diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 774fa5296..b8808567e 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -2,6 +2,9 @@ name: Quality Check on: [pull_request] +env: + UV_SYSTEM_PYTHON: 1 + jobs: check_code_quality: runs-on: ubuntu-latest @@ -16,15 +19,13 @@ jobs: python-version: "3.12" # Setup venv - - name: Setup venv + uv + - name: Setup uv run: | pip install --upgrade uv - uv venv - name: Install dependencies run: uv pip install "smolagents[quality] @ ." # Equivalent of "make quality" but step by step - - run: uv run ruff check examples src tests utils # linter - - run: uv run ruff format --check examples src tests utils # formatter - - run: uv run python utils/check_tests_in_ci.py \ No newline at end of file + - run: ruff check examples src tests # linter + - run: ruff format --check examples src tests # formatter diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c16a90a72..12a794c7b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,6 +1,13 @@ name: Python tests -on: [pull_request] +on: + pull_request: + push: + branches: + - ci-* + +env: + UV_SYSTEM_PYTHON: 1 jobs: build-ubuntu: @@ -21,99 +28,16 @@ jobs: python-version: ${{ matrix.python-version }} # Setup venv - - name: Setup venv + uv + - name: Setup uv run: | pip install --upgrade uv - uv venv # Install dependencies - name: Install dependencies run: | uv pip install "smolagents[test] @ ." - # Run all tests separately for individual feedback - # Use 'if success() || failure()' so that all tests are run even if one failed - # See https://stackoverflow.com/a/62112985 - - name: Import tests - run: | - uv run pytest ./tests/test_import.py - if: ${{ success() || failure() }} - - - name: Agent tests - run: | - uv run pytest ./tests/test_agents.py - if: ${{ success() || failure() }} - - - name: Default tools tests - run: | - uv run pytest ./tests/test_default_tools.py - if: ${{ success() || failure() }} - - # - name: Docs tests # Disabled for now (slow test + requires API keys) - # run: | - # uv run pytest ./tests/test_all_docs.py - - - name: Final answer tests - run: | - uv run pytest ./tests/test_final_answer.py - if: ${{ success() || failure() }} - - - name: Models tests - run: | - uv run pytest ./tests/test_models.py - if: ${{ success() || failure() }} - - - name: Memory tests - run: | - uv run pytest ./tests/test_memory.py - if: ${{ success() || failure() }} - - - name: Monitoring tests - run: | - uv run pytest ./tests/test_monitoring.py - if: ${{ success() || failure() }} - - - name: Local Python executor tests - run: | - uv run pytest ./tests/test_local_python_executor.py - if: ${{ success() || failure() }} - - - name: E2B executor tests - run: | - uv run pytest ./tests/test_e2b_executor.py - if: ${{ success() || failure() }} - - - name: Search tests - run: | - uv run pytest ./tests/test_search.py - if: ${{ success() || failure() }} - - - name: Tools tests - run: | - uv run pytest ./tests/test_tools.py - if: ${{ success() || failure() }} - - - name: Tool validation tests - run: | - uv run pytest ./tests/test_tool_validation.py - if: ${{ success() || failure() }} - - - name: Types tests - run: | - uv run pytest ./tests/test_types.py - if: ${{ success() || failure() }} - - - name: Utils tests - run: | - uv run pytest ./tests/test_utils.py - if: ${{ success() || failure() }} - - - name: Gradio UI tests - run: | - uv run pytest ./tests/test_gradio_ui.py - if: ${{ success() || failure() }} - - - name: Function type hints utils tests + # Run tests + - name: Test with pytest run: | - uv run pytest ./tests/test_function_type_hints_utils.py - if: ${{ success() || failure() }} + pytest ./tests/ diff --git a/.gitignore b/.gitignore index 59bba3ae6..b18528112 100644 --- a/.gitignore +++ b/.gitignore @@ -150,3 +150,6 @@ archive/ savedir/ output/ tool_output/ + +# Gradio runtime +.gradio/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index a4ff4b8b2..000000000 --- a/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -# Base Python image -FROM python:3.12-slim - -# Set working directory -WORKDIR /app - -# Install build dependencies -RUN apt-get update && apt-get install -y \ - build-essential \ - zlib1g-dev \ - libjpeg-dev \ - libpng-dev \ - && rm -rf /var/lib/apt/lists/* - -# Copy package files -COPY . /app/ - -# Install dependencies -RUN pip install --no-cache-dir -r requirements.txt - -# Install the package -RUN pip install -e . - -COPY server.py /app/server.py - -# Expose the port your server will run on -EXPOSE 65432 - -CMD ["python", "/app/server.py"] diff --git a/Makefile b/Makefile index c8e7c04f6..01bb05690 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: quality style test docs utils +.PHONY: quality style test docs check_dirs := examples src tests utils @@ -6,7 +6,6 @@ check_dirs := examples src tests utils quality: ruff check $(check_dirs) ruff format --check $(check_dirs) - python utils/check_tests_in_ci.py # Format source code automatically style: diff --git a/README.md b/README.md index fb853b06e..9d90637fb 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ limitations under the License.

Hugging Face mascot as James Bond -

A smol library to build great agents!

+

Agents that think in code!

@@ -34,15 +34,15 @@ limitations under the License. โœจ **Simplicity**: the logic for agents fits in ~1,000 lines of code (see [agents.py](https://github.com/huggingface/smolagents/blob/main/src/smolagents/agents.py)). We kept abstractions to their minimal shape above raw code! -๐Ÿง‘โ€๐Ÿ’ป **First-class support for Code Agents**. Our [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) writes its actions in code (as opposed to "agents being used to write code"). To make it secure, we support executing in sandboxed environments via [E2B](https://e2b.dev/). +๐Ÿง‘โ€๐Ÿ’ป **First-class support for Code Agents**. Our [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) writes its actions in code (as opposed to "agents being used to write code"). To make it secure, we support executing in sandboxed environments via [E2B](https://e2b.dev/) or via Docker. -๐Ÿค— **Hub integrations**: you can [share/pull tools to/from the Hub](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_hub), and more is to come! +๐Ÿค— **Hub integrations**: you can [share/pull tools or agents to/from the Hub](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_hub) for instant sharing of the most efficient agents! ๐ŸŒ **Model-agnostic**: smolagents supports any LLM. It can be a local `transformers` or `ollama` model, one of [many providers on the Hub](https://huggingface.co/blog/inference-providers), or any model from OpenAI, Anthropic and many others via our [LiteLLM](https://www.litellm.ai/) integration. ๐Ÿ‘๏ธ **Modality-agnostic**: Agents support text, vision, video, even audio inputs! Cf [this tutorial](https://huggingface.co/docs/smolagents/examples/web_browser) for vision. -๐Ÿ› ๏ธ **Tool-agnostic**: you can use tools from [LangChain](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_langchain), [Anthropic's MCP](https://huggingface.co/docs/smolagents/reference/tools#smolagents.ToolCollection.from_mcp), you can even use a [Hub Space](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_space) as a tool. +๐Ÿ› ๏ธ **Tool-agnostic**: you can use tools from [LangChain](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_langchain), [MCP](https://huggingface.co/docs/smolagents/reference/tools#smolagents.ToolCollection.from_mcp), you can even use a [Hub Space](https://huggingface.co/docs/smolagents/reference/tools#smolagents.Tool.from_space) as a tool. Full documentation can be found [here](https://huggingface.co/docs/smolagents/index). @@ -57,9 +57,9 @@ pip install smolagents ``` Then define your agent, give it the tools it needs and run it! ```py -from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel +from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel -model = HfApiModel() +model = InferenceClientModel() agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") @@ -67,7 +67,7 @@ agent.run("How many seconds would it take for a leopard at full speed to run thr https://github.com/user-attachments/assets/cd0226e2-7479-4102-aea0-57c22ca47884 -You can even share your agent to hub: +You can even share your agent to the Hub, as a Space repository: ```py agent.push_to_hub("m-ric/my_agent") @@ -77,12 +77,12 @@ agent.push_to_hub("m-ric/my_agent") Our library is LLM-agnostic: you could switch the example above to any inference provider.
- HfApiModel, gateway for 4 inference providers + InferenceClientModel, gateway for all inference providers supported on HF ```py -from smolagents import HfApiModel +from smolagents import InferenceClientModel -model = HfApiModel( +model = InferenceClientModel( model_id="deepseek-ai/DeepSeek-R1", provider="together", ) @@ -95,7 +95,7 @@ model = HfApiModel( from smolagents import LiteLLMModel model = LiteLLMModel( - "anthropic/claude-3-5-sonnet-latest", + model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, api_key=os.environ["ANTHROPIC_API_KEY"] ) @@ -143,6 +143,18 @@ model = AzureOpenAIServerModel( ) ```
+
+ Amazon Bedrock models + +```py +import os +from smolagents import AmazonBedrockServerModel + +model = AmazonBedrockServerModel( + model_id = os.environ.get("AMAZON_BEDROCK_MODEL_ID") +) +``` +
## CLI @@ -151,7 +163,7 @@ You can run agents from CLI using two commands: `smolagent` and `webagent`. `smolagent` is a generalist command to run a multi-step `CodeAgent` that can be equipped with various tools. ```bash -smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7." --model-type "HfApiModel" --model-id "Qwen/Qwen2.5-Coder-32B-Instruct" --imports "pandas numpy" --tools "web_search" +smolagent "Plan a trip to Tokyo, Kyoto and Osaka between Mar 28 and Apr 7." --model-type "InferenceClientModel" --model-id "Qwen/Qwen2.5-Coder-32B-Instruct" --imports "pandas numpy" --tools "web_search" ``` Meanwhile `webagent`ย is a specific web-browsing agent using [helium](https://github.com/mherrmann/helium) (read more [here](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)). @@ -201,7 +213,7 @@ Writing actions as code snippets is demonstrated to work better than the current Especially, since code execution can be a security concern (arbitrary code execution!), we provide options at runtime: - a secure python interpreter to run code more safely in your environment (more secure than raw code execution but still risky) - - a sandboxed environment using [E2B](https://e2b.dev/) (removes the risk to your own system). + - a sandboxed environment using [E2B](https://e2b.dev/) or Docker (removes the risk to your own system). On top of this [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) class, we still support the standard [`ToolCallingAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.ToolCallingAgent) that writes actions as JSON/text blobs. But we recommend always using `CodeAgent`. @@ -216,7 +228,7 @@ By the way, why use a framework at all? Well, because a big part of this stuff i We've created [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/agents#smolagents.CodeAgent) instances with some leading models, and compared them on [this benchmark](https://huggingface.co/datasets/m-ric/agents_medium_benchmark_2) that gathers questions from a few different benchmarks to propose a varied blend of challenges. -[Find the benchmarking code here](https://github.com/huggingface/smolagents/blob/main/examples/benchmark.ipynb) for more detail on the agentic setup used, and see a comparison of using LLMs code agents compared to vanilla (spoilers: code agents works better). +[Find the benchmarking code here](https://github.com/huggingface/smolagents/blob/main/examples/smolagents_benchmark/run.py) for more detail on the agentic setup used, and see a comparison of using LLMs code agents compared to vanilla (spoilers: code agents works better).

benchmark of different models on agentic workflows. Open model DeepSeek-R1 beats closed-source models. @@ -224,6 +236,14 @@ We've created [`CodeAgent`](https://huggingface.co/docs/smolagents/reference/age This comparison shows that open-source models can now take on the best closed models! +## Security + +Security is a critical consideration when working with code-executing agents. Our library provides: +- Sandboxed execution options using [E2B](https://e2b.dev/) or Docker +- Best practices for running agent code securely + +For security policies, vulnerability reporting, and more information on secure agent execution, please see our [Security Policy](SECURITY.md). + ## Contribute Everyone is welcome to contribute, get started with our [contribution guide](https://github.com/huggingface/smolagents/blob/main/CONTRIBUTING.md). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..0a55a5631 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,9 @@ +# Security Policy + +## Reporting a Vulnerability + +To report a security vulnerability, please contact: security@huggingface.co + +## Learning More About Security + +To learn more about running agents more securely, please see the [Secure Code Execution tutorial](docs/source/en/tutorials/secure_code_execution.mdx) which covers sandboxing with E2B and Docker. diff --git a/docs/README.md b/docs/README.md index be716450b..af4b61c6c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -121,10 +121,6 @@ Adding a new tutorial or section is done in two steps: Make sure to put your new file under the proper section. If you have a doubt, feel free to ask in a Github Issue or PR. -### Translating - -When translating, refer to the guide at [./TRANSLATING.md](https://github.com/huggingface/smolagents/blob/main/docs/TRANSLATING.md). - ### Writing source documentation Values that should be put in `code` should either be surrounded by backticks: \`like so\`. Note that argument names @@ -271,4 +267,5 @@ is to be used in inference and also include the expected (ideally sensible) output. Often, readers will try out the example before even going through the function or class definitions. Therefore, it is of utmost importance that the example -works as expected. \ No newline at end of file +works as expected. + diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index c1efd31dc..c5c2a9a93 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -13,7 +13,7 @@ - local: tutorials/tools title: ๐Ÿ› ๏ธ Tools - in-depth guide - local: tutorials/secure_code_execution - title: ๐Ÿ›ก๏ธ Secure your code execution with E2B + title: ๐Ÿ›ก๏ธ Secure code execution - local: tutorials/memory title: ๐Ÿ“š Manage your agent's memory - title: Conceptual guides @@ -27,7 +27,7 @@ - local: examples/text_to_sql title: Self-correcting Text-to-SQL - local: examples/rag - title: Master you knowledge base with agentic RAG + title: Master your knowledge base with agentic RAG - local: examples/multiagents title: Orchestrate a multi-agent system - local: examples/web_browser diff --git a/docs/source/en/conceptual_guides/intro_agents.mdx b/docs/source/en/conceptual_guides/intro_agents.mdx index ca5ad31c5..ef76b103e 100644 --- a/docs/source/en/conceptual_guides/intro_agents.mdx +++ b/docs/source/en/conceptual_guides/intro_agents.mdx @@ -1,18 +1,3 @@ - # Introduction to Agents ## ๐Ÿค”ย What are agents? @@ -28,13 +13,14 @@ Note that with this definition, "agent" is not a discrete, 0 or 1 definition: in See in the table below how agency can vary across systems: -| Agency Level | Description | How that's called | Example Pattern | -| ------------ | ------------------------------------------------------- | ----------------- | -------------------------------------------------- | -| โ˜†โ˜†โ˜† | LLM output has no impact on program flow | Simple Processor | `process_llm_output(llm_response)` | -| โ˜…โ˜†โ˜† | LLM output determines an if/else switch | Router | `if llm_decision(): path_a() else: path_b()` | -| โ˜…โ˜…โ˜† | LLM output determines function execution | Tool Caller | `run_function(llm_chosen_tool, llm_chosen_args)` | -| โ˜…โ˜…โ˜… | LLM output controls iteration and program continuation | Multi-step Agent | `while llm_should_continue(): execute_next_step()` | -| โ˜…โ˜…โ˜… | One agentic workflow can start another agentic workflow | Multi-Agent | `if llm_trigger(): execute_agent()` | +| Agency Level | Description | Short name | Example Code | +| ------------ | ------------------------------------------------------ | ---------------- | -------------------------------------------------- | +| โ˜†โ˜†โ˜† | LLM output has no impact on program flow | Simple processor | `process_llm_output(llm_response)` | +| โ˜…โ˜†โ˜† | LLM output controls an if/else switch | Router | `if llm_decision(): path_a() else: path_b()` | +| โ˜…โ˜…โ˜† | LLM output controls function execution | Tool call | `run_function(llm_chosen_tool, llm_chosen_args)` | +| โ˜…โ˜…โ˜† | LLM output controls iteration and program continuation | Multi-step Agent | `while llm_should_continue(): execute_next_step()` | +| โ˜…โ˜…โ˜… | One agentic workflow can start another agentic workflow | Multi-Agent | `if llm_trigger(): execute_agent()` | +| โ˜…โ˜…โ˜… | LLM acts in code, can define its own tools / start other agents | Code Agents | `def custom_tool(args): ...` | The multi-step agent has this code structure: diff --git a/docs/source/en/conceptual_guides/react.mdx b/docs/source/en/conceptual_guides/react.mdx index b86c438e2..6358c78fd 100644 --- a/docs/source/en/conceptual_guides/react.mdx +++ b/docs/source/en/conceptual_guides/react.mdx @@ -1,18 +1,3 @@ - # How do multi-step agents work? The ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) is currently the main approach to building agents. diff --git a/docs/source/en/examples/multiagents.mdx b/docs/source/en/examples/multiagents.mdx index 4f41fe8e6..4e43f99f5 100644 --- a/docs/source/en/examples/multiagents.mdx +++ b/docs/source/en/examples/multiagents.mdx @@ -1,18 +1,3 @@ - # Orchestrate a multi-agent system ๐Ÿค–๐Ÿค๐Ÿค– [[open-in-colab]] @@ -39,19 +24,19 @@ Let's set up this system. Run the line below to install the required dependencies: -``` -!pip install markdownify duckduckgo-search smolagents --upgrade -q +```py +! pip install markdownify duckduckgo-search smolagents --upgrade -q ``` -Let's login in order to call the HF Inference API: +Let's login to HF in order to call Inference Providers: -``` +```py from huggingface_hub import login login() ``` -โšก๏ธ Our agent will be powered by [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) using `HfApiModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model. +โšก๏ธ Our agent will be powered by [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) using `InferenceClientModel` class that uses HF's Inference API: the Inference API allows to quickly and easily run any OS model. _Note:_ The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models). @@ -123,19 +108,19 @@ Which configuration to choose for this agent? from smolagents import ( CodeAgent, ToolCallingAgent, - HfApiModel, + InferenceClientModel, DuckDuckGoSearchTool, LiteLLMModel, ) -model = HfApiModel(model_id) +model = InferenceClientModel(model_id=model_id) web_agent = ToolCallingAgent( tools=[DuckDuckGoSearchTool(), visit_webpage], model=model, max_steps=10, - name="search", - description="Runs web searches for you. Give it your query as an argument.", + name="web_search_agent", + description="Runs web searches for you.", ) ``` diff --git a/docs/source/en/examples/rag.mdx b/docs/source/en/examples/rag.mdx index eb1c4c27f..212d38cb7 100644 --- a/docs/source/en/examples/rag.mdx +++ b/docs/source/en/examples/rag.mdx @@ -1,18 +1,3 @@ - # Agentic RAG [[open-in-colab]] @@ -37,7 +22,7 @@ Run the line below to install required dependencies: ```bash !pip install smolagents pandas langchain langchain-community sentence-transformers datasets python-dotenv rank_bm25 --upgrade -q ``` -To call the HF Inference API, you will need a valid token as your environment variable `HF_TOKEN`. +To call Inference Providers, you will need a valid token as your environment variable `HF_TOKEN`. We use python-dotenv to load it. ```py from dotenv import load_dotenv @@ -127,13 +112,13 @@ The agent will need these arguments upon initialization: - `model`: the LLM that powers the agent. Our `model` must be a callable that takes as input a list of messages and returns text. It also needs to accept a stop_sequences argument that indicates when to stop its generation. For convenience, we directly use the HfEngine class provided in the package to get a LLM engine that calls Hugging Face's Inference API. ->[!NOTE] To use a specific model, pass it like this: `HfApiModel("meta-llama/Llama-3.3-70B-Instruct")`. The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models). +>[!NOTE] To use a specific model, pass it like this: `InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct")`. The Inference API hosts models based on various criteria, and deployed models may be updated or replaced without prior notice. Learn more about it [here](https://huggingface.co/docs/api-inference/supported-models). ```py -from smolagents import HfApiModel, CodeAgent +from smolagents import InferenceClientModel, CodeAgent agent = CodeAgent( - tools=[retriever_tool], model=HfApiModel(), max_steps=4, verbosity_level=2 + tools=[retriever_tool], model=InferenceClientModel(), max_steps=4, verbosity_level=2 ) ``` Upon initializing the CodeAgent, it has been automatically given a default system prompt that tells the LLM engine to process step-by-step and generate tool calls as code snippets, but you could replace this prompt template with your own as needed. diff --git a/docs/source/en/examples/text_to_sql.mdx b/docs/source/en/examples/text_to_sql.mdx index 600d8d95c..5cd93479c 100644 --- a/docs/source/en/examples/text_to_sql.mdx +++ b/docs/source/en/examples/text_to_sql.mdx @@ -1,18 +1,3 @@ - # Text-to-SQL [[open-in-colab]] @@ -31,7 +16,7 @@ Run the line below to install required dependencies: ```bash !pip install smolagents python-dotenv sqlalchemy --upgrade -q ``` -To call the HF Inference API, you will need a valid token as your environment variable `HF_TOKEN`. +To call Inference Providers, you will need a valid token as your environment variable `HF_TOKEN`. We use python-dotenv to load it. ```py from dotenv import load_dotenv @@ -137,14 +122,14 @@ Now let us create an agent that leverages this tool. We use the `CodeAgent`, which is smolagentsโ€™ main agent class: an agent that writes actions in code and can iterate on previous output according to the ReAct framework. -The model is the LLM that powers the agent system. `HfApiModel` allows you to call LLMs using HFโ€™s Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API. +The model is the LLM that powers the agent system. `InferenceClientModel` allows you to call LLMs using HFโ€™s Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API. ```py -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel agent = CodeAgent( tools=[sql_engine], - model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"), + model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"), ) agent.run("Can you give me the name of the client who got the most expensive receipt?") ``` @@ -197,7 +182,7 @@ sql_engine.description = updated_description agent = CodeAgent( tools=[sql_engine], - model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct"), + model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"), ) agent.run("Which waiter got more total money from tips?") diff --git a/docs/source/en/examples/web_browser.mdx b/docs/source/en/examples/web_browser.mdx index fe2fc67de..1f464be9a 100644 --- a/docs/source/en/examples/web_browser.mdx +++ b/docs/source/en/examples/web_browser.mdx @@ -111,11 +111,11 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: Now let's create our web automation agent: ```python -from smolagents import HfApiModel +from smolagents import InferenceClientModel # Initialize the model model_id = "meta-llama/Llama-3.3-70B-Instruct" # You can change this to your preferred model -model = HfApiModel(model_id) +model = InferenceClientModel(model_id=model_id) # Create the agent agent = CodeAgent( diff --git a/docs/source/en/guided_tour.mdx b/docs/source/en/guided_tour.mdx index 5eca7fc21..01e247357 100644 --- a/docs/source/en/guided_tour.mdx +++ b/docs/source/en/guided_tour.mdx @@ -1,18 +1,3 @@ - # Agents - Guided tour [[open-in-colab]] @@ -25,28 +10,29 @@ To initialize a minimal agent, you need at least these two arguments: - `model`, a text-generation model to power your agent - because the agent is different from a simple LLM, it is a system that uses a LLM as its engine. You can use any of these options: - [`TransformersModel`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`. - - [`HfApiModel`] leverages a `huggingface_hub.InferenceClient` under the hood and supports all Inference Providers on the Hub. + - [`InferenceClientModel`] leverages a `huggingface_hub.InferenceClient` under the hood and supports all Inference Providers on the Hub: Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more. - [`LiteLLMModel`] similarly lets you call 100+ different models and providers through [LiteLLM](https://docs.litellm.ai/)! - [`AzureOpenAIServerModel`] allows you to use OpenAI models deployed in [Azure](https://azure.microsoft.com/en-us/products/ai-services/openai-service). + - [`AmazonBedrockServerModel`] allows you to use Amazon Bedrock in [AWS](https://aws.amazon.com/bedrock/?nc1=h_ls). - [`MLXModel`] creates a [mlx-lm](https://pypi.org/project/mlx-lm/) pipeline to run inference on your local machine. - `tools`, a list of `Tools` that the agent can use to solve the task. It can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`. -Once you have these two arguments, `tools` and `model`, you can create an agent and run it. You can use any LLM you'd like, either through [Inference Providers](https://huggingface.co/blog/inference-providers), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), [LiteLLM](https://www.litellm.ai/), [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service), or [mlx-lm](https://pypi.org/project/mlx-lm/). +Once you have these two arguments, `tools` and `model`, you can create an agent and run it. You can use any LLM you'd like, either through [Inference Providers](https://huggingface.co/blog/inference-providers), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), [LiteLLM](https://www.litellm.ai/), [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service), [Amazon Bedrock](https://aws.amazon.com/bedrock/?nc1=h_ls), or [mlx-lm](https://pypi.org/project/mlx-lm/). - + -HF Inference API is free to use without a token, but then it will have a rate limit. +Inference Providers need a `HF_TOKEN` to authenticate, but a free HF account already comes with included credits. Upgrade to PRO to raise your included credits. -To access gated models or rise your rate limits with a PRO account, you need to set the environment variable `HF_TOKEN` or pass `token` variable upon initialization of `HfApiModel`. You can get your token from your [settings page](https://huggingface.co/settings/tokens) +To access gated models or rise your rate limits with a PRO account, you need to set the environment variable `HF_TOKEN` or pass `token` variable upon initialization of `InferenceClientModel`. You can get your token from your [settings page](https://huggingface.co/settings/tokens) ```python -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel model_id = "meta-llama/Llama-3.3-70B-Instruct" -model = HfApiModel(model_id=model_id, token="") # You can choose to not pass any model_id to HfApiModel to use a default free model +model = InferenceClientModel(model_id=model_id, token="") # You can choose to not pass any model_id to InferenceClientModel to use a default model # you can also specify a particular provider e.g. provider="together" or provider="sambanova" agent = CodeAgent(tools=[], model=model, add_base_tools=True) @@ -149,6 +135,76 @@ agent.run( ) ``` + + + +The `AmazonBedrockServerModel` class provides native integration with Amazon Bedrock, allowing for direct API calls and comprehensive configuration. + +#### Basic Usage + +```python +# !pip install smolagents[aws_sdk] +from smolagents import CodeAgent, AmazonBedrockServerModel + +model = AmazonBedrockServerModel(model_id="anthropic.claude-3-sonnet-20240229-v1:0") +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` + +#### Advanced Configuration + +```python +import boto3 +from smolagents import AmazonBedrockServerModel + +# Create a custom Bedrock client +bedrock_client = boto3.client( + 'bedrock-runtime', + region_name='us-east-1', + aws_access_key_id='YOUR_ACCESS_KEY', + aws_secret_access_key='YOUR_SECRET_KEY' +) + +additional_api_config = { + "inferenceConfig": { + "maxTokens": 3000 + }, + "guardrailConfig": { + "guardrailIdentifier": "identify1", + "guardrailVersion": 'v1' + }, +} + +# Initialize with comprehensive configuration +model = AmazonBedrockServerModel( + model_id="us.amazon.nova-pro-v1:0", + client=bedrock_client, # Use custom client + **additional_api_config +) + +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` + +#### Using LiteLLMModel + +Alternatively, you can use `LiteLLMModel` with Bedrock models: + +```python +from smolagents import LiteLLMModel, CodeAgent + +model = LiteLLMModel(model_name="bedrock/anthropic.claude-3-sonnet-20240229-v1:0") +agent = CodeAgent(tools=[], model=model) + +agent.run("Explain the concept of quantum computing") +``` + @@ -176,17 +232,22 @@ The Python interpreter also doesn't allow imports by default outside of a safe l You can authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`CodeAgent`]: ```py -model = HfApiModel() +model = InferenceClientModel() agent = CodeAgent(tools=[], model=model, additional_authorized_imports=['requests', 'bs4']) agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") ``` +Additionally, as an extra security layer, access to submodule is forbidden by default, unless explicitly authorized within the import list. +For instance, to access the `numpy.random` submodule, you need to add `'numpy.random'` to the `additional_authorized_imports` list. +This could also be authorized by using `numpy.*`, which will allow `numpy` as well as any subpackage like `numpy.random` and its own subpackages. + > [!WARNING] > The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports! The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent. -You can also use [E2B code executor](https://e2b.dev/docs#what-is-e2-b) instead of a local Python interpreter by first [setting the `E2B_API_KEY` environment variable](https://e2b.dev/dashboard?tab=keys) and then passing `use_e2b_executor=True` upon agent initialization. +You can also use [E2B code executor](https://e2b.dev/docs#what-is-e2-b) or Docker instead of a local Python interpreter. For E2B, first [set the `E2B_API_KEY` environment variable](https://e2b.dev/dashboard?tab=keys) and then pass `executor_type="e2b"` upon agent initialization. For Docker, pass `executor_type="docker"` during initialization. + > [!TIP] > Learn more about code execution [in this tutorial](tutorials/secure_code_execution). @@ -220,7 +281,7 @@ When the agent is initialized, the tool attributes are used to generate a tool d ### Default toolbox -`smolagents` comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools = True`: +`smolagents` comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools=True`: - **DuckDuckGo web search***: performs a web search using DuckDuckGo browser. - **Python code interpreter**: runs your LLM generated Python code in a secure environment. This tool will only be added to [`ToolCallingAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code @@ -279,6 +340,7 @@ The function needs: - A clear name. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`. - Type hints on both inputs and output - A description, that includes an 'Args:' part where each argument is described (without a type indication this time, it will be pulled from the type hint). Same as for the tool name, this description is an instruction manual for the LLM powering you agent, so do not neglect it. + All these elements will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! > [!TIP] @@ -312,8 +374,8 @@ All these attributes will be automatically baked into the agent's system prompt Then you can directly initialize your agent: ```py -from smolagents import CodeAgent, HfApiModel -agent = CodeAgent(tools=[model_download_tool], model=HfApiModel()) +from smolagents import CodeAgent, InferenceClientModel +agent = CodeAgent(tools=[model_download_tool], model=InferenceClientModel()) agent.run( "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" ) @@ -326,7 +388,7 @@ You get the following logs: โ”‚ Can you give me the name of the model that has the most downloads in the 'text-to-video' โ”‚ โ”‚ task on the Hugging Face Hub? โ”‚ โ”‚ โ”‚ -โ•ฐโ”€ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +โ•ฐโ”€ InferenceClientModel - Qwen/Qwen2.5-Coder-32B-Instruct โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” Step 0 โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” โ•ญโ”€ Executing this code: โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ 1 model_name = model_download_tool(task="text-to-video") โ”‚ @@ -364,9 +426,9 @@ Then you can pass this managed agent in the parameter managed_agents upon initia Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]: ```py -from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool +from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool -model = HfApiModel() +model = InferenceClientModel() web_agent = CodeAgent( tools=[DuckDuckGoSearchTool()], @@ -394,14 +456,14 @@ You can use `GradioUI` to interactively submit tasks to your agent and observe i from smolagents import ( load_tool, CodeAgent, - HfApiModel, + InferenceClientModel, GradioUI ) # Import tool from Hub image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) -model = HfApiModel(model_id) +model = InferenceClientModel(model_id=model_id) # Initialize the agent with the image generation tool agent = CodeAgent(tools=[image_generation_tool], model=model) @@ -414,6 +476,9 @@ The `reset=False` flag means the agent's memory is not flushed before launching You can also use this `reset=False` argument to keep the conversation going in any other agentic application. +In gradio UIs, if you want to allow users to interrupt a running agent, you could do this with a button that triggers method `agent.interrupt()`. +This will stop the agent at the end of its current step, then raise an error. + ## Next steps Finally, when you've configured your agent to your needs, you can share it to the Hub! diff --git a/docs/source/en/index.mdx b/docs/source/en/index.mdx index 14f80ff5b..97cc905fc 100644 --- a/docs/source/en/index.mdx +++ b/docs/source/en/index.mdx @@ -1,18 +1,3 @@ - - # `smolagents`

@@ -25,7 +10,7 @@ This library offers: โœจ **Simplicity**: the logic for agents fits in ~thousand lines of code. We kept abstractions to their minimal shape above raw code! -๐ŸŒ **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API and Inference providers, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM. +๐ŸŒ **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through [Inference providers](https://huggingface.co/docs/inference-providers/index): Cerebras, Cohere, Fal, Fireworks, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, etc. It also supports models from OpenAI, Anthropic... it's really easy to power an agent with any LLM. ๐Ÿง‘โ€๐Ÿ’ป **First-class support for Code Agents**, i.e. agents that write their actions in code (as opposed to "agents being used to write code"), [read more here](tutorials/secure_code_execution). diff --git a/docs/source/en/reference/agents.mdx b/docs/source/en/reference/agents.mdx index a6f57183e..d8f975e34 100644 --- a/docs/source/en/reference/agents.mdx +++ b/docs/source/en/reference/agents.mdx @@ -1,18 +1,3 @@ - # Agents diff --git a/docs/source/en/reference/models.mdx b/docs/source/en/reference/models.mdx index 2a7f8f45d..59816c60e 100644 --- a/docs/source/en/reference/models.mdx +++ b/docs/source/en/reference/models.mdx @@ -1,18 +1,3 @@ - # Models @@ -27,13 +12,17 @@ contains the API docs for the underlying classes. ## Models +### Your custom Model + You're free to create and use your own models to power your agent. -You could use any `model` callable for your agent, as long as: -1. It follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`. -2. It stops generating outputs *before* the sequences passed in the argument `stop_sequences` +You could subclass the base `Model` class to create a model for your agent. +The main criteria is to subclass the `generate` method, with these two criteria: +1. It follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns an object with a `.content` attribute. +2. It stops generating outputs at the sequences passed in the argument `stop_sequences`. -For defining your LLM, you can make a `custom_model` method which accepts a list of [messages](./chat_templating) and returns an object with a .content attribute containing the text. This callable also needs to accept a `stop_sequences` argument that indicates when to stop generating. +For defining your LLM, you can make a `CustomModel` class that inherits from the base `Model` class. +It should have a generate method that takes a list of [messages](./chat_templating) and returns an object with a .content attribute containing the text. The `generate` method also needs to accept a `stop_sequences` argument that indicates when to stop generating. ```python from huggingface_hub import login, InferenceClient @@ -44,13 +33,16 @@ model_id = "meta-llama/Llama-3.3-70B-Instruct" client = InferenceClient(model=model_id) -def custom_model(messages, stop_sequences=["Task"]): - response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) - answer = response.choices[0].message - return answer +class CustomModel(Model): + def generate(messages, stop_sequences=["Task"]): + response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1024) + answer = response.choices[0].message + return answer + +custom_model = CustomModel() ``` -Additionally, `custom_model` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to model, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs. +Additionally, `generate` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to model, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs. ### TransformersModel @@ -72,24 +64,24 @@ print(model([{"role": "user", "content": [{"type": "text", "text": "Ok!"}]}], st [[autodoc]] TransformersModel -### HfApiModel +### InferenceClientModel -The `HfApiModel` wraps huggingface_hub's [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference) for the execution of the LLM. It supports both HF's own [Inference API](https://huggingface.co/docs/api-inference/index) as well as all [Inference Providers](https://huggingface.co/blog/inference-providers) available on the Hub. +The `HfApiModel` wraps huggingface_hub's [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference) for the execution of the LLM. It supports all [Inference Providers](https://huggingface.co/docs/inference-providers/index) available on the Hub: Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more. ```python -from smolagents import HfApiModel +from smolagents import InferenceClientModel messages = [ {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]} ] -model = HfApiModel() +model = InferenceClientModel(provider="novita") print(model(messages)) ``` ```text >>> Of course! If you change your mind, feel free to reach out. Take care! ``` -[[autodoc]] HfApiModel +[[autodoc]] InferenceClientModel ### LiteLLMModel @@ -103,12 +95,46 @@ messages = [ {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]} ] -model = LiteLLMModel("anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) +model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) print(model(messages)) ``` [[autodoc]] LiteLLMModel +### LiteLLMRouterModel + +The `LiteLLMRouterModel` is a wrapper around the [LiteLLM Router](https://docs.litellm.ai/docs/routing) that leverages +advanced routing strategies: load-balancing across multiple deployments, prioritizing critical requests via queueing, +and implementing basic reliability measures such as cooldowns, fallbacks, and exponential backoff retries. + +```python +from smolagents import LiteLLMRouterModel + +messages = [ + {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]} +] + +model = LiteLLMRouterModel( + model_id="llama-3.3-70b", + model_list=[ + { + "model_name": "llama-3.3-70b", + "litellm_params": {"model": "groq/llama-3.3-70b", "api_key": os.getenv("GROQ_API_KEY")}, + }, + { + "model_name": "llama-3.3-70b", + "litellm_params": {"model": "cerebras/llama-3.3-70b", "api_key": os.getenv("CEREBRAS_API_KEY")}, + }, + ], + client_kwargs={ + "routing_strategy": "simple-shuffle", + }, +) +print(model(messages)) +``` + +[[autodoc]] LiteLLMRouterModel + ### OpenAIServerModel This class lets you call any OpenAIServer compatible model. @@ -149,6 +175,24 @@ model = AzureOpenAIServerModel( [[autodoc]] AzureOpenAIServerModel +### AmazonBedrockServerModel + +`AmazonBedrockServerModel` helps you connect to Amazon Bedrock and run your agent with any available models. + +Below is an example setup. This class also offers additional options for customization. + +```py +import os + +from smolagents import AmazonBedrockServerModel + +model = AmazonBedrockServerModel( + model_id = os.environ.get("AMAZON_BEDROCK_MODEL_ID"), +) +``` + +[[autodoc]] AmazonBedrockServerModel + ### MLXModel @@ -167,3 +211,20 @@ print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])) > You must have `mlx-lm` installed on your machine. Please run `pip install smolagents[mlx-lm]` if it's not the case. [[autodoc]] MLXModel + +### VLLMModel + +Model to use [vLLM](https://docs.vllm.ai/) for fast LLM inference and serving. + +```python +from smolagents import VLLMModel + +model = VLLMModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct") + +print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])) +``` + +> [!TIP] +> You must have `vllm` installed on your machine. Please run `pip install smolagents[vllm]` if it's not the case. + +[[autodoc]] VLLMModel diff --git a/docs/source/en/reference/tools.mdx b/docs/source/en/reference/tools.mdx index 68c70b897..a5d217bb8 100644 --- a/docs/source/en/reference/tools.mdx +++ b/docs/source/en/reference/tools.mdx @@ -1,18 +1,3 @@ - # Tools @@ -77,6 +62,10 @@ contains the API docs for the underlying classes. [[autodoc]] ToolCollection +## MCP Client + +[[autodoc]] smolagents.mcp_client.MCPClient + ## Agent Types Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return diff --git a/docs/source/en/tutorials/building_good_agents.mdx b/docs/source/en/tutorials/building_good_agents.mdx index 8c17de1af..53bda8f92 100644 --- a/docs/source/en/tutorials/building_good_agents.mdx +++ b/docs/source/en/tutorials/building_good_agents.mdx @@ -1,18 +1,3 @@ - # Building good agents [[open-in-colab]] @@ -43,7 +28,7 @@ This leads to a few takeaways: ### Improve the information flow to the LLM engine -Remember that your LLM engine is like an *intelligent* robot, tapped into a room with the only communication with the outside world being notes passed under a door. +Remember that your LLM engine is like an *intelligent* robot, trapped into a room with the only communication with the outside world being notes passed under a door. It won't know of anything that happened if you don't explicitly put that into its prompt. @@ -120,11 +105,11 @@ In general, to ease the load on your LLM, the good question to ask yourself is: To pass some additional objects to your agent beyond the simple string describing the task, you can use the `additional_args` argument to pass any type of object: ```py -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel model_id = "meta-llama/Llama-3.3-70B-Instruct" -agent = CodeAgent(tools=[], model=HfApiModel(model_id=model_id), add_base_tools=True) +agent = CodeAgent(tools=[], model=InferenceClientModel(model_id=model_id), add_base_tools=True) agent.run( "Why does Mike not know many people in New York?", @@ -210,13 +195,153 @@ In the end you have to return a final answer using the `final_answer` tool. Here are a few examples using notional tools: --- -{examples} +Task: "Generate an image of the oldest person in this document." -Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: +Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer. +Code: +```py +answer = document_qa(document=document, question="Who is the oldest person mentioned?") +print(answer) +``` +Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." + +Thought: I will now generate an image showcasing the oldest person. +Code: +```py +image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.") +final_answer(image) +``` + +--- +Task: "What is the result of the following operation: 5 + 3 + 1294.678?" + +Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool +Code: +```py +result = 5 + 3 + 1294.678 +final_answer(result) +``` + +--- +Task: +"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French. +You have been provided with these additional arguments, that you can access using the keys as variables in your python code: +{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}" + +Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image. +Code: +```py +translated_question = translator(question=question, src_lang="French", tgt_lang="English") +print(f"The translated question is {translated_question}.") +answer = image_qa(image=image, question=translated_question) +final_answer(f"The answer is {answer}") +``` + +--- +Task: +In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer. +What does he say was the consequence of Einstein learning too much math on his creativity, in one word? -{{tool_descriptions}} +Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin. +Code: +```py +pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein") +print(pages) +``` +Observation: +No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein". + +Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query. +Code: +```py +pages = search(query="1979 interview Stanislaus Ulam") +print(pages) +``` +Observation: +Found 6 pages: +[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/) + +[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/) + +(truncated) + +Thought: I will read the first 2 pages to know more. +Code: +```py +for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]: + whole_page = visit_webpage(url) + print(whole_page) + print("\n" + "="*80 + "\n") # Print separator between pages +``` +Observation: +Manhattan Project Locations: +Los Alamos, NM +Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at +(truncated) + +Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word. +Code: +```py +final_answer("diminished") +``` -{{managed_agents_descriptions}} +--- +Task: "Which city has the highest population: Guangzhou or Shanghai?" + +Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities. +Code: +```py +for city in ["Guangzhou", "Shanghai"]: + print(f"Population {city}:", search(f"{city} population") +``` +Observation: +Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] +Population Shanghai: '26 million (2019)' + +Thought: Now I know that Shanghai has the highest population. +Code: +```py +final_answer("Shanghai") +``` + +--- +Task: "What is the current age of the pope, raised to the power 0.36?" + +Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search. +Code: +```py +pope_age_wiki = wiki(query="current pope age") +print("Pope age as per wikipedia:", pope_age_wiki) +pope_age_search = web_search(query="current pope age") +print("Pope age as per google search:", pope_age_search) +``` +Observation: +Pope age: "The pope Francis is currently 88 years old." + +Thought: I know that the pope is 88 years old. Let's compute the result using python code. +Code: +```py +pope_current_age = 88 ** 0.36 +final_answer(pope_current_age) +``` + +Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: +{%- for tool in tools.values() %} +- {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.inputs}} + Returns an output of type: {{tool.output_type}} +{%- endfor %} + +{%- if managed_agents and managed_agents.values() | list %} +You can also give tasks to team members. +Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. +Given that this team member is a real human, you should be very verbose in your task. +Here is a list of the team members that you can call: +{%- for agent in managed_agents.values() %} +- {{ agent.name }}: {{ agent.description }} +{%- endfor %} +{%- else %} +{%- endif %} Here are the rules you should always follow to solve your task: 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```' sequence, else you will fail. @@ -225,7 +350,7 @@ Here are the rules you should always follow to solve your task: 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block. 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters. 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'. -7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables. +7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables. 8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}} 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. 10. Don't give up! You're in charge of solving the task, not providing directions to solve it. @@ -233,12 +358,30 @@ Here are the rules you should always follow to solve your task: Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. ``` -As you can see, there are placeholders like `"{{tool_descriptions}}"`: these will be used upon agent initialization to insert certain automatically generated descriptions of tools or managed agents. - -So while you can overwrite this system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter, your new system prompt must contain the following placeholders: -- `"{{tool_descriptions}}"` to insert tool descriptions. -- `"{{managed_agents_description}}"` to insert the description for managed agents if there are any. -- For `CodeAgent` only: `"{{authorized_imports}}"` to insert the list of authorized imports. +As you can see, there are placeholders like `"{{ tool.description }}"`: these will be used upon agent initialization to insert certain automatically generated descriptions of tools or managed agents. + +So while you can overwrite this system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter, your new system prompt can contain the following placeholders: +- To insert tool descriptions: + ``` + {%- for tool in tools.values() %} + - {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.inputs}} + Returns an output of type: {{tool.output_type}} + {%- endfor %} + ``` +- To insert the descriptions for managed agents if there are any: + ``` + {%- if managed_agents and managed_agents.values() | list %} + You can also give tasks to team members. + Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. + Given that this team member is a real human, you should be very verbose in your task. + Here is a list of the team members that you can call: + {%- for agent in managed_agents.values() %} + - {{ agent.name }}: {{ agent.description }} + {%- endfor %} + {%- endif %} + ``` +- For `CodeAgent` only, to insert the list of authorized imports: `"{{authorized_imports}}"` Then you can change the system prompt as follows: @@ -254,7 +397,7 @@ This also works with the [`ToolCallingAgent`]. We provide a model for a supplementary planning step, that an agent can run regularly in-between normal action steps. In this step, there is no tool call, the LLM is simply asked to update a list of facts it knows and to reflect on what steps it should take next based on those facts. ```py -from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool +from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool from dotenv import load_dotenv load_dotenv() @@ -266,7 +409,7 @@ search_tool = DuckDuckGoSearchTool() agent = CodeAgent( tools=[search_tool, image_generation_tool], - model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"), + model=InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct"), planning_interval=3 # This is where you activate planning! ) diff --git a/docs/source/en/tutorials/inspect_runs.mdx b/docs/source/en/tutorials/inspect_runs.mdx index 4ade8427b..333db728b 100644 --- a/docs/source/en/tutorials/inspect_runs.mdx +++ b/docs/source/en/tutorials/inspect_runs.mdx @@ -1,18 +1,3 @@ - # Inspecting runs with OpenTelemetry [[open-in-colab]] @@ -71,10 +56,10 @@ from smolagents import ( ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, - HfApiModel, + InferenceClientModel, ) -model = HfApiModel() +model = InferenceClientModel() search_agent = ToolCallingAgent( tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], @@ -160,10 +145,10 @@ from smolagents import ( ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, - HfApiModel, + InferenceClientModel, ) -model = HfApiModel( +model = InferenceClientModel( model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ) diff --git a/docs/source/en/tutorials/memory.mdx b/docs/source/en/tutorials/memory.mdx index 0732d9596..df982da82 100644 --- a/docs/source/en/tutorials/memory.mdx +++ b/docs/source/en/tutorials/memory.mdx @@ -1,18 +1,3 @@ - # ๐Ÿ“š Manage your agent's memory [[open-in-colab]] @@ -30,9 +15,9 @@ You can also use `agent.replay()`, as follows: After the agent has run: ```py -from smolagents import HfApiModel, CodeAgent +from smolagents import InferenceClientModel, CodeAgent -agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0) +agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=0) result = agent.run("What's the 20th Fibonacci number?") ``` @@ -73,7 +58,7 @@ You can also use step callbacks to dynamically change the agent's memory. Step callbacks can access the `agent` itself in their arguments, so they can access any memory step as highlighted above, and change it if needed. For instance, let's say you are observing screenshots of each step performed by a web browser agent. You want to log the newest screenshot, and remove the images from ancient steps to save on token costs. -You culd run something like the following. +You could run something like the following. _Note: this code is incomplete, some imports and object definitions have been removed for the sake of concision, visit [the original script](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to get the full working code._ ```py @@ -115,9 +100,10 @@ This can be useful in case you have tool calls that take days: you can just run This will also let you update the memory on each step. ```py -from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep +from smolagents import InferenceClientModel, CodeAgent, ActionStep, TaskStep -agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1) +agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=1) +agent.python_executor.send_tools({**agent.tools}) print(agent.memory.system_prompt) task = "What is the 20th Fibonacci number?" @@ -145,4 +131,4 @@ while final_answer is None and step_number <= 10: # agent.memory.steps[-1] = ... print("The final answer is:", final_answer) -``` \ No newline at end of file +``` diff --git a/docs/source/en/tutorials/secure_code_execution.mdx b/docs/source/en/tutorials/secure_code_execution.mdx index daa8ee900..8716f63c6 100644 --- a/docs/source/en/tutorials/secure_code_execution.mdx +++ b/docs/source/en/tutorials/secure_code_execution.mdx @@ -1,18 +1,3 @@ - # Secure code execution [[open-in-colab]] @@ -24,12 +9,12 @@ rendered properly in your Markdown viewer. [Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) [papers](https://huggingface.co/papers/2401.00812) have shown that having the LLM write its actions (the tool calls) in code is much better than the current standard format for tool calling, which is across the industry different shades of "writing actions as a JSON of tools names and arguments to use". -Why is code better? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets was a better way, this package would have been written in JSON snippets and the devil would be laughing at us. +Why is code better? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets were a better way, this package would have been written in JSON snippets and the devil would be laughing at us. Code is just a better way to express actions on a computer. It has better: - **Composability:** could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function? - **Object management:** how do you store the output of an action like `generate_image` in JSON? -- **Generality:** code is built to express simply anything you can do have a computer do. +- **Generality:** code is built to express simply anything you can have a computer do. - **Representation in LLM training corpus:** why not leverage this benediction of the sky that plenty of quality actions have already been included in LLM training corpus? This is illustrated on the figure below, taken from [Executable Code Actions Elicit Better LLM Agents](https://huggingface.co/papers/2402.01030). @@ -38,45 +23,392 @@ This is illustrated on the figure below, taken from [Executable Code Actions Eli This is why we put emphasis on proposing code agents, in this case python agents, which meant putting higher effort on building secure python interpreters. -### Local python interpreter +### Local code execution?? By default, the `CodeAgent` runs LLM-generated code in your environment. -This execution is not done by the vanilla Python interpreter: we've re-built a more secure `LocalPythonInterpreter` from the ground up. -This interpreter is designed for security by: - - Restricting the imports to a list explicitly passed by the user - - Capping the number of operations to prevent infinite loops and resource bloating. - - Will not perform any operation that's not pre-defined. -We've used this on many use cases, without ever observing any damage to the environment. +This is inherently risky, LLM-generated code could be harmful to your environment. + +Malicious code execution can occur in several ways: +- **Plain LLM error:** LLMs are still far from perfect and may unintentionally generate harmful commands while attempting to be helpful. While this risk is low, instances have been observed where an LLM attempted to execute potentially dangerous code. +- **Supply chain attack:** Running an untrusted or compromised LLM could expose a system to harmful code generation. While this risk is extremely low when using well-known models on secure inference infrastructure, it remains a theoretical possibility. +- **Prompt injection:** an agent browsing the web could arrive on a malicious website that contains harmful instructions, thus injecting an attack into the agent's memory +- **Exploitation of publicly accessible agents:** Agents exposed to the public can be misused by malicious actors to execute harmful code. Attackers may craft adversarial inputs to exploit the agent's execution capabilities, leading to unintended consequences. +Once malicious code is executed, whether accidentally or intentionally, it can damage the file system, exploit local or cloud-based resources, abuse API services, and even compromise network security. + +One could argue that on the [spectrum of agency](../conceptual_guides/intro_agents), code agents give much higher agency to the LLM on your system than other less agentic setups: this goes hand-in-hand with higher risk. + +So you need to be very mindful of security. + +To improve safety, we propose a range of measures that propose elevated levels of security, at a higher setup cost. + +We advise you to keep in mind that no solution will be 100% safe. + + + +### Our local Python executor + +To add a first layer of security, code execution in `smolagents` is not performed by the vanilla Python interpreter. +We have re-built a more secure `LocalPythonExecutor` from the ground up. + +To be precise, this interpreter works by loading the Abstract Syntax Tree (AST) from your Code and executes it operation by operation, making sure to always follow certain rules: +- By default, imports are disallowed unless they have been explicitly added to an authorization list by the user. +- Furthermore, access to submodules is disabled by default, and each must be explicitly authorized in the import list as well, or you can pass for instance `numpy.*` to allow both `numpy` and all its subpackags, like `numpy.random` or `numpy.a.b`. + - Note that some seemingly innocuous packages like `random` can give access to potentially harmful submodules, as in `random._os`. +- The total count of elementary operations processed is capped to prevent infinite loops and resource bloating. +- Any operation that has not been explicitly defined in our custom interpreter will raise an error. + +You could try these safeguards as follows: + +```py +from smolagents.local_python_executor import LocalPythonExecutor + +# Set up custom executor, authorize package "numpy" +custom_executor = LocalPythonExecutor(["numpy"]) + +# Utilisty for pretty printing errors +def run_capture_exception(command: str): + try: + custom_executor(harmful_command) + except Exception as e: + print("ERROR:\n", e) + +# Undefined command just do not work +harmful_command="!echo Bad command" +run_capture_exception(harmful_command) +# >>> ERROR: invalid syntax (, line 1) + + +# Imports like os will not be performed unless explicitly added to `additional_authorized_imports` +harmful_command="import os; exit_code = os.system("echo Bad command")" +run_capture_exception(harmful_command) +# >>> ERROR: Code execution failed at line 'import os' due to: InterpreterError: Import of os is not allowed. Authorized imports are: ['statistics', 'numpy', 'itertools', 'time', 'queue', 'collections', 'math', 'random', 're', 'datetime', 'stat', 'unicodedata'] + +# Even in authorized imports, potentially harmful packages will not be imported +harmful_command="import random; random._os.system('echo Bad command')" +run_capture_exception(harmful_command) +# >>> ERROR: Code execution failed at line 'random._os.system('echo Bad command')' due to: InterpreterError: Forbidden access to module: os + +# Infinite loop are interrupted after N operations +harmful_command=""" +while True: + pass +""" +run_capture_exception(harmful_command) +# >>> ERROR: Code execution failed at line 'while True: pass' due to: InterpreterError: Maximum number of 1000000 iterations in While loop exceeded +``` + +These safeguards make out interpreter is safer. +We have used it on a diversity of use cases, without ever observing any damage to the environment. + +> [!WARNING] +> It's important to understand that no local python sandbox can ever be completely secure. While our interpreter provides significant safety improvements over the standard Python interpreter, it is still possible for a determined attacker or a fine-tuned malicious LLM to find vulnerabilities and potentially harm your environment. +> +> For example, if you've allowed packages like `Pillow` to process images, the LLM could generate code that creates thousands of large image files to fill your hard drive. Other advanced escape techniques might exploit deeper vulnerabilities in authorized packages. +> +> Running LLM-generated code in your local environment always carries some inherent risk. The only way to run LLM-generated code with truly robust security isolation is to use remote execution options like E2B or Docker, as detailed below. + +The risk of a malicious attack is low when using well-known LLMs from trusted inference providers, but it is not zero. +For high-security applications or when using less trusted models, you should consider using a remote execution sandbox. + +## Sandbox approaches for secure code execution + +When working with AI agents that execute code, security is paramount. There are two main approaches to sandboxing code execution in smolagents, each with different security properties and capabilities: + + +![Sandbox approaches comparison](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/remote_execution.png) + +1. **Running individual code snippets in a sandbox**: This approach (left side of diagram) only executes the agent-generated Python code snippets in a sandbox while keeping the rest of the agentic system in your local environment. It's simpler to set up using `executor_type="e2b"` or `executor_type="docker"`, but it doesn't support multi-agents and still requires passing state data between your environment and the sandbox. + +2. **Running the entire agentic system in a sandbox**: This approach (right side of diagram) runs the entire agentic system, including the agent, model, and tools, within a sandbox environment. This provides better isolation but requires more manual setup and may require passing sensitive credentials (like API keys) to the sandbox environment. + +This guide describes how to set up and use both types of sandbox approaches for your agent applications. + +### E2B setup + +#### Installation + +1. Create an E2B account at [e2b.dev](https://e2b.dev) +2. Install the required packages: +```bash +pip install 'smolagents[e2b]' +``` + +#### Running your agent in E2B: quick start + +We provide a simple way to use an E2B Sandbox: simply add `executor_type="e2b"` to the agent initialization, as follows: + +```py +from smolagents import InferenceClientModel, CodeAgent + +agent = CodeAgent(model=InferenceClientModel(), tools=[], executor_type="e2b") + +agent.run("Can you give me the 100th Fibonacci number?") +``` + +This solution send the agent state to the server at the start of each `agent.run()`. +Then the models are called from the local environment, but the generated code will be sent to the sandbox for execution, and only the output will be returned. + +This is illustrated in the figure below. + +

+ sandboxed code execution +

+ -However this solution is not watertight: one could imagine occasions where LLMs fine-tuned for malignant actions could still hurt your environment. For instance if you've allowed an innocuous package like `Pillow` to process images, the LLM could generate thousands of saves of images to bloat your hard drive. -It's certainly not likely if you've chosen the LLM engine yourself, but it could happen. +However, since any call to a [managed agent](../examples/multiagents) would require model calls, since we do not transfer secrets to the remote sandbox, the model call would lack credentials. +Hence this solution does not work (yet) with more complicated multi-agent setups. -So if you want to be extra cautious, you can use the remote code execution option described below. +#### Running your agent in E2B: multi-agents -### E2B code executor +To use multi-agents in an E2B sandbox, you need to run your agents completely from within E2B. -For maximum security, you can use our integration with E2B to run code in a sandboxed environment. This is a remote execution service that runs your code in an isolated container, making it impossible for the code to affect your local environment. +Here is how to do it: -For this, you will need to setup your E2B account and set your `E2B_API_KEY` in your environment variables. Head to [E2B's quickstart documentation](https://e2b.dev/docs/quickstart) for more information. +```python +from e2b_code_interpreter import Sandbox +import os -Then you can install it with `pip install "smolagents[e2b]"`. +# Create the sandbox +sandbox = Sandbox() -Now you're set! +# Install required packages +sandbox.commands.run("pip install smolagents") -To set the code executor to E2B, simply pass the flag `use_e2b_executor=True` when initializing your `CodeAgent`. -Note that you should add all the tool's dependencies in `additional_authorized_imports`, so that the executor installs them. +def run_code_raise_errors(sandbox, code: str, verbose: bool = False) -> str: + execution = sandbox.run_code( + code, + envs={'HF_TOKEN': os.getenv('HF_TOKEN')} + ) + if execution.error: + execution_logs = "\n".join([str(log) for log in execution.logs.stdout]) + logs = execution_logs + logs += execution.error.traceback + raise ValueError(logs) + return "\n".join([str(log) for log in execution.logs.stdout]) +# Define your agent application +agent_code = """ +import os +from smolagents import CodeAgent, InferenceClientModel + +# Initialize the agents +agent = CodeAgent( + model=InferenceClientModel(token=os.getenv("HF_TOKEN"), provider="together"), + tools=[], + name="coder_agent", + description="This agent takes care of your difficult algorithmic problems using code." +) + +manager_agent = CodeAgent( + model=InferenceClientModel(token=os.getenv("HF_TOKEN"), provider="together"), + tools=[], + managed_agents=[agent], +) + +# Run the agent +response = manager_agent.run("What's the 20th Fibonacci number?") +print(response) +""" + +# Run the agent code in the sandbox +execution_logs = run_code_raise_errors(sandbox, agent_code) +print(execution_logs) +``` + +### Docker setup + +#### Installation + +1. [Install Docker on your system](https://docs.docker.com/get-started/get-docker/) +2. Install the required packages: +```bash +pip install 'smolagents[docker]' +``` + +#### Running your agent in E2B: quick start + +Similar to the E2B Sandbox above, to quickly get started with Docker, simply add `executor_type="docker"` to the agent initialization, like: ```py -from smolagents import CodeAgent, VisitWebpageTool, HfApiModel +from smolagents import InferenceClientModel, CodeAgent + +agent = CodeAgent(model=InferenceClientModel(), tools=[], executor_type="docker") + +agent.run("Can you give me the 100th Fibonacci number?") +``` + +#### Advanced docker usage + +If you want to run multi-agent systems in Docker, you'll need to setup a custom interpreter in a sandbox. + +Here is how to setup the a Dockerfile: + +```dockerfile +FROM python:3.10-bullseye + +# Install build dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + python3-dev && \ + pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir smolagents && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Run with limited privileges +USER nobody + +# Default command +CMD ["python", "-c", "print('Container ready')"] +``` + +Create a sandbox manager to run code: + +```python +import docker +import os +from typing import Optional + +class DockerSandbox: + def __init__(self): + self.client = docker.from_env() + self.container = None + + def create_container(self): + try: + image, build_logs = self.client.images.build( + path=".", + tag="agent-sandbox", + rm=True, + forcerm=True, + buildargs={}, + # decode=True + ) + except docker.errors.BuildError as e: + print("Build error logs:") + for log in e.build_log: + if 'stream' in log: + print(log['stream'].strip()) + raise + + # Create container with security constraints and proper logging + self.container = self.client.containers.run( + "agent-sandbox", + command="tail -f /dev/null", # Keep container running + detach=True, + tty=True, + mem_limit="512m", + cpu_quota=50000, + pids_limit=100, + security_opt=["no-new-privileges"], + cap_drop=["ALL"], + environment={ + "HF_TOKEN": os.getenv("HF_TOKEN") + }, + ) + + def run_code(self, code: str) -> Optional[str]: + if not self.container: + self.create_container() + + # Execute code in container + exec_result = self.container.exec_run( + cmd=["python", "-c", code], + user="nobody" + ) + + # Collect all output + return exec_result.output.decode() if exec_result.output else None + + + def cleanup(self): + if self.container: + try: + self.container.stop() + except docker.errors.NotFound: + # Container already removed, this is expected + pass + except Exception as e: + print(f"Error during cleanup: {e}") + finally: + self.container = None # Clear the reference + +# Example usage: +sandbox = DockerSandbox() + +try: + # Define your agent code + agent_code = """ +import os +from smolagents import CodeAgent, InferenceClientModel + +# Initialize the agent agent = CodeAgent( - tools = [VisitWebpageTool()], - model=HfApiModel(), - additional_authorized_imports=["requests", "markdownify"], - use_e2b_executor=True + model=InferenceClientModel(token=os.getenv("HF_TOKEN"), provider="together"), + tools=[] ) -agent.run("What was Abraham Lincoln's preferred pet?") +# Run the agent +response = agent.run("What's the 20th Fibonacci number?") +print(response) +""" + + # Run the code in the sandbox + output = sandbox.run_code(agent_code) + print(output) + +finally: + sandbox.cleanup() ``` -E2B code execution is not compatible with multi-agents at the moment - because having an agent call in a code blob that should be executed remotely is a mess. But we're working on adding it! +### Best practices for sandboxes + +These key practices apply to both E2B and Docker sandboxes: + +- Resource management + - Set memory and CPU limits + - Implement execution timeouts + - Monitor resource usage +- Security + - Run with minimal privileges + - Disable unnecessary network access + - Use environment variables for secrets +- Environment + - Keep dependencies minimal + - Use fixed package versions + - If you use base images, update them regularly + +- Cleanup + - Always ensure proper cleanup of resources, especially for Docker containers, to avoid having dangling containers eating up resources. + +โœจ By following these practices and implementing proper cleanup procedures, you can ensure your agent runs safely and efficiently in a sandboxed environment. + +## Comparing security approaches + +As illustrated in the diagram earlier, both sandboxing approaches have different security implications: + +### Approach 1: Running just the code snippets in a sandbox +- **Pros**: + - Easier to set up with a simple parameter (`executor_type="e2b"` or `executor_type="docker"`) + - No need to transfer API keys to the sandbox + - Better protection for your local environment +- **Cons**: + - Doesn't support multi-agents (managed agents) + - Still requires transferring state between your environment and the sandbox + - Limited to specific code execution + +### Approach 2: Running the entire agentic system in a sandbox +- **Pros**: + - Supports multi-agents + - Complete isolation of the entire agent system + - More flexible for complex agent architectures +- **Cons**: + - Requires more manual setup + - May require transferring sensitive API keys to the sandbox + - Potentially higher latency due to more complex operations + +Choose the approach that best balances your security needs with your application's requirements. For most applications with simpler agent architectures, Approach 1 provides a good balance of security and ease of use. For more complex multi-agent systems where you need full isolation, Approach 2, while more involved to set up, offers better security guarantees. \ No newline at end of file diff --git a/docs/source/en/tutorials/tools.mdx b/docs/source/en/tutorials/tools.mdx index d9da1e94f..a6b24d280 100644 --- a/docs/source/en/tutorials/tools.mdx +++ b/docs/source/en/tutorials/tools.mdx @@ -1,18 +1,3 @@ - # Tools [[open-in-colab]] @@ -82,7 +67,7 @@ In this case, you can build your tool by subclassing [`Tool`] as described above ### Share your tool to the Hub -You can share your custom tool to the Hub by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access. +You can share your custom tool to the Hub as a Space repository by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access. ```python model_downloads_tool.push_to_hub("{your_username}/hf-model-downloads", token="") @@ -112,7 +97,7 @@ model_download_tool = load_tool( ### Import a Space as a tool -You can directly import a Space from the Hub as a tool using the [`Tool.from_space`] method! +You can directly import a Gradio Space from the Hub as a tool using the [`Tool.from_space`] method! You only need to provide the id of the Space on the Hub, its name, and a description that will help you agent understand what the tool does. Under the hood, this will use [`gradio-client`](https://pypi.org/project/gradio-client/) library to call the Space. @@ -131,12 +116,12 @@ And voilร , here's your image! ๐Ÿ–๏ธ -Then you can use this tool just like any other tool. For example, let's improve the prompt `a rabbit wearing a space suit` and generate an image of it. This example also shows how you can pass additional arguments to the agent. +Then you can use this tool just like any other tool. For example, let's improve the prompt `a rabbit wearing a space suit` and generate an image of it. This example also shows how you can pass additional arguments to the agent. ```python -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel -model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") +model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") agent = CodeAgent(tools=[image_generation_tool], model=model) agent.run( @@ -182,9 +167,9 @@ You can manage an agent's toolbox by adding or replacing a tool in attribute `ag Let's add the `model_download_tool` to an existing agent initialized with only the default toolbox. ```python -from smolagents import HfApiModel +from smolagents import InferenceClientModel -model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") +model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.tools[model_download_tool.name] = model_download_tool @@ -204,7 +189,7 @@ agent.run( ### Use a collection of tools -You can leverage tool collections by using the `ToolCollection` object. It supports loading either a collection from the Hub or an MCP server tools. +You can leverage tool collections by using [`ToolCollection`]. It supports loading either a collection from the Hub or an MCP server tools. #### Tool Collection from a collection in the Hub @@ -229,19 +214,119 @@ To speed up the start, tools are loaded only if called by the agent. Leverage tools from the hundreds of MCP servers available on [glama.ai](https://glama.ai/mcp/servers) or [smithery.ai](https://smithery.ai/). -The MCP servers tools can be loaded in a `ToolCollection` object as follow: +> [!WARNING] +> **Security Warning:** Using MCP servers comes with security risks: +> - **Trust is essential:** Only use MCP servers from trusted sources. Malicious servers can execute harmful code on your machine. +> - **Stdio-based MCP servers** will always execute code on your machine (that's their intended functionality). +> - **SSE-based MCP servers** while the remote MCP servers will not be able to execute code on your machine, still proceed with caution. +> +> Always verify the source and integrity of any MCP server before connecting to it, especially for production environments. + +The MCP servers tools can be loaded with [`ToolCollection.from_mcp`]. +For stdio-based MCP servers, pass the server parameters as an instance of `mcp.StdioServerParameters`: ```py from smolagents import ToolCollection, CodeAgent from mcp import StdioServerParameters server_parameters = StdioServerParameters( - command="uv", + command="uvx", args=["--quiet", "pubmedmcp@0.1.3"], env={"UV_PYTHON": "3.12", **os.environ}, ) -with ToolCollection.from_mcp(server_parameters) as tool_collection: +with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection: + agent = CodeAgent(tools=[*tool_collection.tools], model=model, add_base_tools=True) + agent.run("Please find a remedy for hangover.") +``` + +For SSE-based MCP servers, simply pass a dict with parameters to `mcp.client.sse.sse_client`: +```py +from smolagents import ToolCollection, CodeAgent + +with ToolCollection.from_mcp({"url": "http://127.0.0.1:8000/sse"}, trust_remote_code=True) as tool_collection: agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True) agent.run("Please find a remedy for hangover.") -``` \ No newline at end of file +``` + +### Use MCP tools with MCPClient directly + +You can also work with MCP tools by using the `MCPClient` directly, which gives you more control over the connection and tool management: + +For stdio-based MCP servers: +```python +from smolagents import MCPClient, CodeAgent +from mcp import StdioServerParameters +import os + +server_parameters = StdioServerParameters( + command="uvx", # Using uvx ensures dependencies are available + args=["--quiet", "pubmedmcp@0.1.3"], + env={"UV_PYTHON": "3.12", **os.environ}, +) + +with MCPClient(server_parameters) as tools: + agent = CodeAgent(tools=tools, model=model, add_base_tools=True) + agent.run("Please find the latest research on COVID-19 treatment.") +``` + +For SSE-based MCP servers: +```python +from smolagents import MCPClient, CodeAgent + +with MCPClient({"url": "http://127.0.0.1:8000/sse"}) as tools: + agent = CodeAgent(tools=tools, model=model, add_base_tools=True) + agent.run("Please find a remedy for hangover.") +``` + +You can also manually manage the connection lifecycle with the try...finally pattern: + +```python +from smolagents import MCPClient, CodeAgent +from mcp import StdioServerParameters +import os + +# Initialize server parameters +server_parameters = StdioServerParameters( + command="uvx", + args=["--quiet", "pubmedmcp@0.1.3"], + env={"UV_PYTHON": "3.12", **os.environ}, +) + +# Manually manage the connection +try: + mcp_client = MCPClient(server_parameters) + tools = mcp_client.get_tools() + + # Use the tools with your agent + agent = CodeAgent(tools=tools, model=model, add_base_tools=True) + result = agent.run("What are the recent therapeutic approaches for Alzheimer's disease?") + + # Process the result as needed + print(f"Agent response: {result}") +finally: + # Always ensure the connection is properly closed + mcp_client.disconnect() +``` + +You can also connect to multiple MCP servers at once by passing a list of server parameters: +```python +from smolagents import MCPClient, CodeAgent +from mcp import StdioServerParameters +import os + +server_params1 = StdioServerParameters( + command="uvx", + args=["--quiet", "pubmedmcp@0.1.3"], + env={"UV_PYTHON": "3.12", **os.environ}, +) + +server_params2 = {"url": "http://127.0.0.1:8000/sse"} + +with MCPClient([server_params1, server_params2]) as tools: + agent = CodeAgent(tools=tools, model=model, add_base_tools=True) + agent.run("Please analyze the latest research and suggest remedies for headaches.") +``` + +> [!WARNING] +> **Security Warning:** The same security warnings mentioned for `ToolCollection.from_mcp` apply when using `MCPClient` directly. diff --git a/docs/source/hi/conceptual_guides/intro_agents.mdx b/docs/source/hi/conceptual_guides/intro_agents.mdx index 15b93798e..071df435d 100644 --- a/docs/source/hi/conceptual_guides/intro_agents.mdx +++ b/docs/source/hi/conceptual_guides/intro_agents.mdx @@ -1,18 +1,3 @@ - # Agents เค•เคพ เคชเคฐเคฟเคšเคฏ ## ๐Ÿค” Agents เค•เฅเคฏเคพ เคนเฅˆเค‚? diff --git a/docs/source/hi/conceptual_guides/react.mdx b/docs/source/hi/conceptual_guides/react.mdx index 0f17901e8..8c0ce0f27 100644 --- a/docs/source/hi/conceptual_guides/react.mdx +++ b/docs/source/hi/conceptual_guides/react.mdx @@ -1,18 +1,3 @@ - # เคฎเคฒเฅเคŸเฅ€-เคธเฅเคŸเฅ‡เคช เคเคœเฅ‡เค‚เคŸเฅเคธ เค•เฅˆเคธเฅ‡ เค•เคพเคฎ เค•เคฐเคคเฅ‡ เคนเฅˆเค‚? ReAct เคซเฅเคฐเฅ‡เคฎเคตเคฐเฅเค• ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) เคตเคฐเฅเคคเคฎเคพเคจ เคฎเฅ‡เค‚ เคเคœเฅ‡เค‚เคŸเฅเคธ เคฌเคจเคพเคจเฅ‡ เค•เคพ เคฎเฅเค–เฅเคฏ เคฆเฅƒเคทเฅเคŸเคฟเค•เฅ‹เคฃ เคนเฅˆเฅค diff --git a/docs/source/hi/examples/multiagents.mdx b/docs/source/hi/examples/multiagents.mdx index 1e9fcc745..7ee85f92d 100644 --- a/docs/source/hi/examples/multiagents.mdx +++ b/docs/source/hi/examples/multiagents.mdx @@ -1,18 +1,3 @@ - # เคฎเคฒเฅเคŸเฅ€-เคเคœเฅ‡เค‚เคŸ เคธเคฟเคธเฅเคŸเคฎ เค•เคพ เค†เคฏเฅ‹เคœเคจ เค•เคฐเฅ‡เค‚ ๐Ÿค–๐Ÿค๐Ÿค– [[open-in-colab]] @@ -54,7 +39,7 @@ from huggingface_hub import login login() ``` -โšก๏ธ เคนเคฎเคพเคฐเคพ เคเคœเฅ‡เค‚เคŸ [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) เคฆเฅเคตเคพเคฐเคพ เคธเค‚เคšเคพเคฒเคฟเคค เคนเฅ‹เค—เคพ เคœเฅ‹ `HfApiModel` เค•เฅเคฒเคพเคธ เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเคคเคพ เคนเฅˆ เคœเฅ‹ HF เค•เฅ‡ Inference API เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเคคเคพ เคนเฅˆ: Inference API เค•เคฟเคธเฅ€ เคญเฅ€ OS เคฎเฅ‰เคกเคฒ เค•เฅ‹ เคœเคฒเฅเคฆเฅ€ เค”เคฐ เค†เคธเคพเคจเฅ€ เคธเฅ‡ เคšเคฒเคพเคจเฅ‡ เค•เฅ€ เค…เคจเฅเคฎเคคเคฟ เคฆเฅ‡เคคเคพ เคนเฅˆเฅค +โšก๏ธ เคนเคฎเคพเคฐเคพ เคเคœเฅ‡เค‚เคŸ [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) เคฆเฅเคตเคพเคฐเคพ เคธเค‚เคšเคพเคฒเคฟเคค เคนเฅ‹เค—เคพ เคœเฅ‹ `InferenceClientModel` เค•เฅเคฒเคพเคธ เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเคคเคพ เคนเฅˆ เคœเฅ‹ HF เค•เฅ‡ Inference API เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเคคเคพ เคนเฅˆ: Inference API เค•เคฟเคธเฅ€ เคญเฅ€ OS เคฎเฅ‰เคกเคฒ เค•เฅ‹ เคœเคฒเฅเคฆเฅ€ เค”เคฐ เค†เคธเคพเคจเฅ€ เคธเฅ‡ เคšเคฒเคพเคจเฅ‡ เค•เฅ€ เค…เคจเฅเคฎเคคเคฟ เคฆเฅ‡เคคเคพ เคนเฅˆเฅค _เคจเฅ‹เคŸ:_ The Inference API เคตเคฟเคญเคฟเคจเฅเคจ เคฎเคพเคจเคฆเค‚เคกเฅ‹เค‚ เค•เฅ‡ เค†เคงเคพเคฐ เคชเคฐ เคฎเฅ‰เคกเคฒ เคนเฅ‹เคธเฅเคŸ เค•เคฐเคคเคพ เคนเฅˆ, เค”เคฐ เคกเคฟเคชเฅเคฒเฅ‰เคฏ เค•เคฟเค เค—เค เคฎเฅ‰เคกเคฒ เคฌเคฟเคจเคพ เคชเฅ‚เคฐเฅเคต เคธเฅ‚เคšเคจเคพ เค•เฅ‡ เค…เคชเคกเฅ‡เคŸ เคฏเคพ เคฌเคฆเคฒเฅ‡ เคœเคพ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค เค‡เคธเค•เฅ‡ เคฌเคพเคฐเฅ‡ เคฎเฅ‡เค‚ เค…เคงเคฟเค• เคœเคพเคจเฅ‡เค‚ [เคฏเคนเคพเค‚](https://huggingface.co/docs/api-inference/supported-models)เฅค @@ -126,13 +111,13 @@ print(visit_webpage("https://en.wikipedia.org/wiki/Hugging_Face")[:500]) from smolagents import ( CodeAgent, ToolCallingAgent, - HfApiModel, + InferenceClientModel, ManagedAgent, DuckDuckGoSearchTool, LiteLLMModel, ) -model = HfApiModel(model_id) +model = InferenceClientModel(model_id=model_id) web_agent = ToolCallingAgent( tools=[DuckDuckGoSearchTool(), visit_webpage], diff --git a/docs/source/hi/examples/rag.mdx b/docs/source/hi/examples/rag.mdx index 9e7a0e595..478080d8b 100644 --- a/docs/source/hi/examples/rag.mdx +++ b/docs/source/hi/examples/rag.mdx @@ -1,18 +1,3 @@ - # เคเคœเฅ‡เค‚เคŸเคฟเค• RAG [[open-in-colab]] @@ -135,10 +120,10 @@ retriever_tool = RetrieverTool(docs_processed) _เคจเฅ‹เคŸ:_ Inference API เคตเคฟเคญเคฟเคจเฅเคจ เคฎเคพเคจเคฆเค‚เคกเฅ‹เค‚ เค•เฅ‡ เค†เคงเคพเคฐ เคชเคฐ เคฎเฅ‰เคกเคฒ เคนเฅ‹เคธเฅเคŸ เค•เคฐเคคเคพ เคนเฅˆ, เค”เคฐ เคกเคฟเคชเฅเคฒเฅ‰เคฏ เค•เคฟเค เค—เค เคฎเฅ‰เคกเคฒ เคฌเคฟเคจเคพ เคชเฅ‚เคฐเฅเคต เคธเฅ‚เคšเคจเคพ เค•เฅ‡ เค…เคชเคกเฅ‡เคŸ เคฏเคพ เคฌเคฆเคฒเฅ‡ เคœเคพ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค เค‡เคธเค•เฅ‡ เคฌเคพเคฐเฅ‡ เคฎเฅ‡เค‚ เค…เคงเคฟเค• เคœเคพเคจเฅ‡เค‚ [เคฏเคนเคพเค‚](https://huggingface.co/docs/api-inference/supported-models) เคชเคขเคผเฅ‡เค‚เฅค ```py -from smolagents import HfApiModel, CodeAgent +from smolagents import InferenceClientModel, CodeAgent agent = CodeAgent( - tools=[retriever_tool], model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbosity_level=2 + tools=[retriever_tool], model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbosity_level=2 ) ``` diff --git a/docs/source/hi/examples/text_to_sql.mdx b/docs/source/hi/examples/text_to_sql.mdx index 213821ac8..69fc9820c 100644 --- a/docs/source/hi/examples/text_to_sql.mdx +++ b/docs/source/hi/examples/text_to_sql.mdx @@ -1,19 +1,4 @@ - -# Text-to-SQL +# Text-to-SQL [[open-in-colab]] @@ -125,14 +110,14 @@ def sql_engine(query: str) -> str: เคนเคฎ `CodeAgent` เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเคคเฅ‡ เคนเฅˆเค‚, เคœเฅ‹ smolagents เค•เคพ เคฎเฅเค–เฅเคฏ เคเคœเฅ‡เค‚เคŸ เค•เฅเคฒเคพเคธ เคนเฅˆ: เคเค• เคเคœเฅ‡เค‚เคŸ เคœเฅ‹ เค•เฅ‹เคก เคฎเฅ‡เค‚ เคเค•เฅเคถเคจ เคฒเคฟเค–เคคเคพ เคนเฅˆ เค”เคฐ ReAct เคซเฅเคฐเฅ‡เคฎเคตเคฐเฅเค• เค•เฅ‡ เค…เคจเฅเคธเคพเคฐ เคชเคฟเค›เคฒเฅ‡ เค†เค‰เคŸเคชเฅเคŸ เคชเคฐ เคชเฅเคจเคฐเคพเคตเฅƒเคคเฅเคคเคฟ เค•เคฐ เคธเค•เคคเคพ เคนเฅˆเฅค -เคฎเฅ‰เคกเคฒ เคตเคน LLM เคนเฅˆ เคœเฅ‹ เคเคœเฅ‡เค‚เคŸ เคธเคฟเคธเฅเคŸเคฎ เค•เฅ‹ เคธเค‚เคšเคพเคฒเคฟเคค เค•เคฐเคคเคพ เคนเฅˆเฅค `HfApiModel` เค†เคชเค•เฅ‹ HF เค•เฅ‡ Inference API เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเค•เฅ‡ LLM เค•เฅ‹ เค•เฅ‰เคฒ เค•เคฐเคจเฅ‡ เค•เฅ€ เค…เคจเฅเคฎเคคเคฟ เคฆเฅ‡เคคเคพ เคนเฅˆ, เคฏเคพ เคคเฅ‹ เคธเคฐเฅเคตเคฐเคฒเฅ‡เคธ เคฏเคพ เคกเฅ‡เคกเคฟเค•เฅ‡เคŸเฅ‡เคก เคเค‚เคกเคชเฅ‰เค‡เค‚เคŸ เค•เฅ‡ เคฎเคพเคงเฅเคฏเคฎ เคธเฅ‡, เคฒเฅ‡เค•เคฟเคจ เค†เคช เค•เคฟเคธเฅ€ เคญเฅ€ เคชเฅเคฐเฅ‹เคชเฅเคฐเคพเค‡เคŸเคฐเฅ€ API เค•เคพ เคญเฅ€ เค‰เคชเคฏเฅ‹เค— เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค +เคฎเฅ‰เคกเคฒ เคตเคน LLM เคนเฅˆ เคœเฅ‹ เคเคœเฅ‡เค‚เคŸ เคธเคฟเคธเฅเคŸเคฎ เค•เฅ‹ เคธเค‚เคšเคพเคฒเคฟเคค เค•เคฐเคคเคพ เคนเฅˆเฅค `InferenceClientModel` เค†เคชเค•เฅ‹ HF เค•เฅ‡ Inference API เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเค•เฅ‡ LLM เค•เฅ‹ เค•เฅ‰เคฒ เค•เคฐเคจเฅ‡ เค•เฅ€ เค…เคจเฅเคฎเคคเคฟ เคฆเฅ‡เคคเคพ เคนเฅˆ, เคฏเคพ เคคเฅ‹ เคธเคฐเฅเคตเคฐเคฒเฅ‡เคธ เคฏเคพ เคกเฅ‡เคกเคฟเค•เฅ‡เคŸเฅ‡เคก เคเค‚เคกเคชเฅ‰เค‡เค‚เคŸ เค•เฅ‡ เคฎเคพเคงเฅเคฏเคฎ เคธเฅ‡, เคฒเฅ‡เค•เคฟเคจ เค†เคช เค•เคฟเคธเฅ€ เคญเฅ€ เคชเฅเคฐเฅ‹เคชเฅเคฐเคพเค‡เคŸเคฐเฅ€ API เค•เคพ เคญเฅ€ เค‰เคชเคฏเฅ‹เค— เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค ```py -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel agent = CodeAgent( tools=[sql_engine], - model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"), + model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"), ) agent.run("Can you give me the name of the client who got the most expensive receipt?") ``` @@ -188,7 +173,7 @@ sql_engine.description = updated_description agent = CodeAgent( tools=[sql_engine], - model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct"), + model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"), ) agent.run("Which waiter got more total money from tips?") diff --git a/docs/source/hi/guided_tour.mdx b/docs/source/hi/guided_tour.mdx index 745b6643a..1c7f5742e 100644 --- a/docs/source/hi/guided_tour.mdx +++ b/docs/source/hi/guided_tour.mdx @@ -1,18 +1,3 @@ - # Agents - เค—เคพเค‡เคกเฅ‡เคก เคŸเฅ‚เคฐ [[open-in-colab]] @@ -25,7 +10,7 @@ rendered properly in your Markdown viewer. - `model`, เค†เคชเค•เฅ‡ เคเคœเฅ‡เค‚เคŸ เค•เฅ‹ เคชเคพเคตเคฐ เคฆเฅ‡เคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เคเค• เคŸเฅ‡เค•เฅเคธเฅเคŸ-เคœเคจเคฐเฅ‡เคถเคจ เคฎเฅ‰เคกเคฒ - เค•เฅเคฏเฅ‹เค‚เค•เคฟ เคเคœเฅ‡เค‚เคŸ เคเค• เคธเคฟเค‚เคชเคฒ LLM เคธเฅ‡ เค…เคฒเค— เคนเฅˆ, เคฏเคน เคเค• เคธเคฟเคธเฅเคŸเคฎ เคนเฅˆ เคœเฅ‹ LLM เค•เฅ‹ เค…เคชเคจเฅ‡ เค‡เค‚เคœเคจ เค•เฅ‡ เคฐเฅ‚เคช เคฎเฅ‡เค‚ เค‰เคชเคฏเฅ‹เค— เค•เคฐเคคเคพ เคนเฅˆเฅค เค†เคช เค‡เคจเคฎเฅ‡เค‚ เคธเฅ‡ เค•เฅ‹เคˆ เคญเฅ€ เคตเคฟเค•เคฒเฅเคช เค‰เคชเคฏเฅ‹เค— เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚: - [`TransformersModel`] `transformers` เคชเคพเค‡เคชเคฒเคพเค‡เคจ เค•เฅ‹ เคชเคนเคฒเฅ‡ เคธเฅ‡ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเคผ เค•เคฐเคคเคพ เคนเฅˆ เคœเฅ‹ `transformers` เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเค•เฅ‡ เค†เคชเค•เฅ€ เคฒเฅ‹เค•เคฒ เคฎเคถเฅ€เคจ เคชเคฐ เค‡เคจเฅเคซเคฐเฅ‡เค‚เคธ เคšเคฒเคพเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เคนเฅ‹เคคเคพ เคนเฅˆเฅค - - [`HfApiModel`] เค…เค‚เคฆเคฐ เคธเฅ‡ `huggingface_hub.InferenceClient` เค•เคพ เคฒเคพเคญ เค‰เค เคพเคคเคพ เคนเฅˆเฅค + - [`InferenceClientModel`] เค…เค‚เคฆเคฐ เคธเฅ‡ `huggingface_hub.InferenceClient` เค•เคพ เคฒเคพเคญ เค‰เค เคพเคคเคพ เคนเฅˆเฅค - [`LiteLLMModel`] เค†เคชเค•เฅ‹ [LiteLLM](https://docs.litellm.ai/) เค•เฅ‡ เคฎเคพเคงเฅเคฏเคฎ เคธเฅ‡ 100+ เค…เคฒเค—-เค…เคฒเค— เคฎเฅ‰เคกเคฒเฅเคธ เค•เฅ‹ เค•เฅ‰เคฒ เค•เคฐเคจเฅ‡ เคฆเฅ‡เคคเคพ เคนเฅˆ! - `tools`, `Tools` เค•เฅ€ เคเค• เคฒเคฟเคธเฅเคŸ เคœเคฟเคธเฅ‡ เคเคœเฅ‡เค‚เคŸ เคŸเคพเคธเฅเค• เค•เฅ‹ เคนเคฒ เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เค‰เคชเคฏเฅ‹เค— เค•เคฐ เคธเค•เคคเคพ เคนเฅˆเฅค เคฏเคน เคเค• เค–เคพเคฒเฅ€ เคฒเคฟเคธเฅเคŸ เคนเฅ‹ เคธเค•เคคเฅ€ เคนเฅˆเฅค เค†เคช เค‘เคชเฅเคถเคจเคฒ เค†เคฐเฅเค—เฅเคฏเฅ‚เคฎเฅ‡เค‚เคŸ `add_base_tools=True` เค•เฅ‹ เคชเคฐเคฟเคญเคพเคทเคฟเคค เค•เคฐเค•เฅ‡ เค…เคชเคจเฅ€ `tools` เคฒเคฟเคธเฅเคŸ เค•เฅ‡ เคŠเคชเคฐ เคกเคฟเคซเคผเฅ‰เคฒเฅเคŸ เคŸเฅ‚เคฒเคฌเฅ‰เค•เฅเคธ เคญเฅ€ เคœเฅ‹เคกเคผ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค @@ -37,14 +22,14 @@ rendered properly in your Markdown viewer. Hugging Face API เคŸเฅ‹เค•เคจ เค•เฅ‡ เคฌเคฟเคจเคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เคฎเฅเคซเฅเคค เคนเฅˆ, เคฒเฅ‡เค•เคฟเคจ เคซเคฟเคฐ เค‡เคธเคฎเฅ‡เค‚ เคฐเฅ‡เคŸ เคฒเคฟเคฎเคฟเคŸเฅ‡เคถเคจ เคนเฅ‹เค—เฅ€เฅค -เค—เฅ‡เคŸเฅ‡เคก เคฎเฅ‰เคกเคฒเฅเคธ เคคเค• เคชเคนเฅเค‚เคšเคจเฅ‡ เคฏเคพ PRO เค…เค•เคพเค‰เค‚เคŸ เค•เฅ‡ เคธเคพเคฅ เค…เคชเคจเฅ€ เคฐเฅ‡เคŸ เคฒเคฟเคฎเคฟเคŸเฅเคธ เคฌเคขเคผเคพเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค, เค†เคชเค•เฅ‹ เคเคจเคตเคพเคฏเคฐเคจเคฎเฅ‡เค‚เคŸ เคตเฅ‡เคฐเคฟเคเคฌเคฒ `HF_TOKEN` เคธเฅ‡เคŸ เค•เคฐเคจเคพ เคนเฅ‹เค—เคพ เคฏเคพ `HfApiModel` เค•เฅ‡ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เคชเคฐ `token` เคตเฅ‡เคฐเคฟเคเคฌเคฒ เคชเคพเคธ เค•เคฐเคจเคพ เคนเฅ‹เค—เคพเฅค +เค—เฅ‡เคŸเฅ‡เคก เคฎเฅ‰เคกเคฒเฅเคธ เคคเค• เคชเคนเฅเค‚เคšเคจเฅ‡ เคฏเคพ PRO เค…เค•เคพเค‰เค‚เคŸ เค•เฅ‡ เคธเคพเคฅ เค…เคชเคจเฅ€ เคฐเฅ‡เคŸ เคฒเคฟเคฎเคฟเคŸเฅเคธ เคฌเคขเคผเคพเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค, เค†เคชเค•เฅ‹ เคเคจเคตเคพเคฏเคฐเคจเคฎเฅ‡เค‚เคŸ เคตเฅ‡เคฐเคฟเคเคฌเคฒ `HF_TOKEN` เคธเฅ‡เคŸ เค•เคฐเคจเคพ เคนเฅ‹เค—เคพ เคฏเคพ `InferenceClientModel` เค•เฅ‡ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เคชเคฐ `token` เคตเฅ‡เคฐเคฟเคเคฌเคฒ เคชเคพเคธ เค•เคฐเคจเคพ เคนเฅ‹เค—เคพเฅค ```python -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel model_id = "meta-llama/Llama-3.3-70B-Instruct" -model = HfApiModel(model_id=model_id, token="") +model = InferenceClientModel(model_id=model_id, token="") agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.run( @@ -114,7 +99,7 @@ agent.run( เค†เคช เค…เคชเคจเฅ‡ [`CodeAgent`] เค•เฅ‡ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เคชเคฐ เค†เคฐเฅเค—เฅเคฏเฅ‚เคฎเฅ‡เค‚เคŸ `additional_authorized_imports` เคฎเฅ‡เค‚ เคธเฅเคŸเฅเคฐเคฟเค‚เค—เฅเคธ เค•เฅ€ เคฒเคฟเคธเฅเคŸ เค•เฅ‡ เคฐเฅ‚เคช เคฎเฅ‡เค‚ เค…เคคเคฟเคฐเคฟเค•เฅเคค เคฎเฅ‰เคกเฅเคฏเฅ‚เคฒเฅเคธ เค•เฅ‹ เค…เคงเคฟเค•เฅƒเคค เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค ```py -model = HfApiModel() +model = InferenceClientModel() agent = CodeAgent(tools=[], model=model, additional_authorized_imports=['requests', 'bs4']) agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") ``` @@ -124,7 +109,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคถเคจ เค•เคฟเคธเฅ€ เคญเฅ€ เค•เฅ‹เคก เคชเคฐ เคฐเฅเค• เคœเคพเคเค—เคพ เคœเฅ‹ เคเค• เค…เคตเฅˆเคง เค‘เคชเคฐเฅ‡เคถเคจ เค•เคฐเคจเฅ‡ เค•เคพ เคชเฅเคฐเคฏเคพเคธ เค•เคฐเคคเคพ เคนเฅˆ เคฏเคพ เคฏเคฆเคฟ เคเคœเฅ‡เค‚เคŸ เคฆเฅเคตเคพเคฐเคพ เคœเคจเคฐเฅ‡เคŸ เค•เคฟเค เค—เค เค•เฅ‹เคก เคฎเฅ‡เค‚ เคเค• เคฐเฅ‡เค—เฅเคฒเคฐ เคชเคพเคฏเคฅเคจ เคเคฐเคฐ เคนเฅˆเฅค -เค†เคช [E2B เค•เฅ‹เคก เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคŸเคฐ](https://e2b.dev/docs#what-is-e2-b) เค•เคพ เค‰เคชเคฏเฅ‹เค— เคฒเฅ‹เค•เคฒ เคชเคพเคฏเคฅเคจ เค‡เค‚เคŸเคฐเคชเฅเคฐเฅ‡เคŸเคฐ เค•เฅ‡ เคฌเคœเคพเคฏ เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚, เคชเคนเคฒเฅ‡ [`E2B_API_KEY` เคเคจเคตเคพเคฏเคฐเคจเคฎเฅ‡เค‚เคŸ เคตเฅ‡เคฐเคฟเคเคฌเคฒ เคธเฅ‡เคŸ เค•เคฐเค•เฅ‡](https://e2b.dev/dashboard?tab=keys) เค”เคฐ เคซเคฟเคฐ เคเคœเฅ‡เค‚เคŸ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เคชเคฐ `use_e2b_executor=True` เคชเคพเคธ เค•เคฐเค•เฅ‡เฅค +เค†เคช [E2B เค•เฅ‹เคก เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคŸเคฐ](https://e2b.dev/docs#what-is-e2-b) เคฏเคพ Docker เค•เคพ เค‰เคชเคฏเฅ‹เค— เคฒเฅ‹เค•เคฒ เคชเคพเคฏเคฅเคจ เค‡เค‚เคŸเคฐเคชเฅเคฐเฅ‡เคŸเคฐ เค•เฅ‡ เคฌเคœเคพเคฏ เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค E2B เค•เฅ‡ เคฒเคฟเค, เคชเคนเคฒเฅ‡ [`E2B_API_KEY` เคเคจเคตเคพเคฏเคฐเคจเคฎเฅ‡เค‚เคŸ เคตเฅ‡เคฐเคฟเคเคฌเคฒ เคธเฅ‡เคŸ เค•เคฐเฅ‡เค‚](https://e2b.dev/dashboard?tab=keys) เค”เคฐ เคซเคฟเคฐ เคเคœเฅ‡เค‚เคŸ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เคชเคฐ `executor_type="e2b"` เคชเคพเคธ เค•เคฐเฅ‡เค‚เฅค Docker เค•เฅ‡ เคฒเคฟเค, เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เค•เฅ‡ เคฆเฅŒเคฐเคพเคจ `executor_type="docker"` เคชเคพเคธ เค•เคฐเฅ‡เค‚เฅค > [!TIP] > เค•เฅ‹เคก เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคถเคจ เค•เฅ‡ เคฌเคพเคฐเฅ‡ เคฎเฅ‡เค‚ เค”เคฐ เคœเคพเคจเฅ‡เค‚ [เค‡เคธ เคŸเฅเคฏเฅ‚เคŸเฅ‹เคฐเคฟเคฏเคฒ เคฎเฅ‡เค‚](tutorials/secure_code_execution)เฅค @@ -158,7 +143,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co ### เคกเคฟเคซเคผเฅ‰เคฒเฅเคŸ เคŸเฅ‚เคฒเคฌเฅ‰เค•เฅเคธ -`smolagents` เคเคœเฅ‡เค‚เคŸเฅเคธ เค•เฅ‹ เคธเคถเค•เฅเคค เคฌเคจเคพเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เคเค• เคกเคฟเคซเคผเฅ‰เคฒเฅเคŸ เคŸเฅ‚เคฒเคฌเฅ‰เค•เฅเคธ เค•เฅ‡ เคธเคพเคฅ เค†เคคเคพ เคนเฅˆ, เคœเคฟเคธเฅ‡ เค†เคช เค†เคฐเฅเค—เฅเคฏเฅ‚เคฎเฅ‡เค‚เคŸ `add_base_tools = True` เค•เฅ‡ เคธเคพเคฅ เค…เคชเคจเฅ‡ เคเคœเฅ‡เค‚เคŸ เคฎเฅ‡เค‚ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เคชเคฐ เคœเฅ‹เคกเคผ เคธเค•เคคเฅ‡ เคนเฅˆเค‚: +`smolagents` เคเคœเฅ‡เค‚เคŸเฅเคธ เค•เฅ‹ เคธเคถเค•เฅเคค เคฌเคจเคพเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เคเค• เคกเคฟเคซเคผเฅ‰เคฒเฅเคŸ เคŸเฅ‚เคฒเคฌเฅ‰เค•เฅเคธ เค•เฅ‡ เคธเคพเคฅ เค†เคคเคพ เคนเฅˆ, เคœเคฟเคธเฅ‡ เค†เคช เค†เคฐเฅเค—เฅเคฏเฅ‚เคฎเฅ‡เค‚เคŸ `add_base_tools=True` เค•เฅ‡ เคธเคพเคฅ เค…เคชเคจเฅ‡ เคเคœเฅ‡เค‚เคŸ เคฎเฅ‡เค‚ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เคชเคฐ เคœเฅ‹เคกเคผ เคธเค•เคคเฅ‡ เคนเฅˆเค‚: - **DuckDuckGo เคตเฅ‡เคฌ เคธเคฐเฅเคš**: DuckDuckGo เคฌเฅเคฐเคพเค‰เคœเคผเคฐ เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเค•เฅ‡ เคตเฅ‡เคฌ เคธเคฐเฅเคš เค•เคฐเคคเคพ เคนเฅˆเฅค - **เคชเคพเคฏเคฅเคจ เค•เฅ‹เคก เค‡เค‚เคŸเคฐเคชเฅเคฐเฅ‡เคŸเคฐ**: เค†เคชเค•เคพ LLM เคœเคจเคฐเฅ‡เคŸเฅ‡เคก เคชเคพเคฏเคฅเคจ เค•เฅ‹เคก เคเค• เคธเฅเคฐเค•เฅเคทเคฟเคค เคเคจเคตเคพเคฏเคฐเคจเคฎเฅ‡เค‚เคŸ เคฎเฅ‡เค‚ เคšเคฒเคพเคคเคพ เคนเฅˆเฅค เคฏเคน เคŸเฅ‚เคฒ [`ToolCallingAgent`] เคฎเฅ‡เค‚ เค•เฅ‡เคตเคฒ เคคเคญเฅ€ เคœเฅ‹เคกเคผเคพ เคœเคพเคเค—เคพ เคœเคฌ เค†เคช เค‡เคธเฅ‡ `add_base_tools=True` เค•เฅ‡ เคธเคพเคฅ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเคผ เค•เคฐเคคเฅ‡ เคนเฅˆเค‚, เค•เฅเคฏเฅ‹เค‚เค•เคฟ เค•เฅ‹เคก-เคฌเฅ‡เคธเฅเคก เคเคœเฅ‡เค‚เคŸ เคชเคนเคฒเฅ‡ เคธเฅ‡ เคนเฅ€ เคจเฅ‡เคŸเคฟเคต เคฐเฅ‚เคช เคธเฅ‡ เคชเคพเคฏเคฅเคจ เค•เฅ‹เคก เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคŸ เค•เคฐ เคธเค•เคคเคพ เคนเฅˆ @@ -250,8 +235,8 @@ class ModelDownloadTool(Tool): เค†เคช เคธเฅ€เคงเฅ‡ เค…เคชเคจเฅ‡ เคเคœเฅ‡เค‚เคŸ เค•เฅ‹ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเคผ เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚: ```py -from smolagents import CodeAgent, HfApiModel -agent = CodeAgent(tools=[model_download_tool], model=HfApiModel()) +from smolagents import CodeAgent, InferenceClientModel +agent = CodeAgent(tools=[model_download_tool], model=InferenceClientModel()) agent.run( "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" ) @@ -264,7 +249,7 @@ agent.run( โ”‚ Can you give me the name of the model that has the most downloads in the 'text-to-video' โ”‚ โ”‚ task on the Hugging Face Hub? โ”‚ โ”‚ โ”‚ -โ•ฐโ”€ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +โ•ฐโ”€ InferenceClientModel - Qwen/Qwen2.5-Coder-32B-Instruct โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” Step 0 โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” โ•ญโ”€ Executing this code: โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ 1 model_name = model_download_tool(task="text-to-video") โ”‚ @@ -301,9 +286,9 @@ Microsoft เค•เฅ‡ เคซเฅเคฐเฅ‡เคฎเคตเคฐเฅเค• [Autogen](https://huggingface.co/pa เคฏเคนเคพเค‚ เคเค• เคเคœเฅ‡เค‚เคŸ เคฌเคจเคพเคจเฅ‡ เค•เคพ เค‰เคฆเคพเคนเคฐเคฃ เคฆเคฟเคฏเคพ เค—เคฏเคพ เคนเฅˆ เคœเฅ‹ เคนเคฎเคพเคฐเฅ‡ [`DuckDuckGoSearchTool`] เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฐเค•เฅ‡ เคเค• เคตเคฟเคถเคฟเคทเฅเคŸ เคตเฅ‡เคฌ เค–เฅ‹เคœ เคเคœเฅ‡เค‚เคŸ เค•เฅ‹ เคชเฅเคฐเคฌเค‚เคงเคฟเคค เค•เคฐเคคเคพ เคนเฅˆเฅค ```py -from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent +from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, ManagedAgent -model = HfApiModel() +model = InferenceClientModel() web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) @@ -332,14 +317,14 @@ manager_agent.run("Who is the CEO of Hugging Face?") from smolagents import ( load_tool, CodeAgent, - HfApiModel, + InferenceClientModel, GradioUI ) # Import tool from Hub image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) -model = HfApiModel(model_id) +model = InferenceClientModel(model_id=model_id) # Initialize the agent with the image generation tool agent = CodeAgent(tools=[image_generation_tool], model=model) diff --git a/docs/source/hi/index.mdx b/docs/source/hi/index.mdx index 533b3b62d..40c938b55 100644 --- a/docs/source/hi/index.mdx +++ b/docs/source/hi/index.mdx @@ -1,18 +1,3 @@ - - # `smolagents`
diff --git a/docs/source/hi/reference/agents.mdx b/docs/source/hi/reference/agents.mdx index 2e070cf03..95e097560 100644 --- a/docs/source/hi/reference/agents.mdx +++ b/docs/source/hi/reference/agents.mdx @@ -1,18 +1,3 @@ - # Agents @@ -98,12 +83,12 @@ print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])) [[autodoc]] TransformersModel -### HfApiModel +### InferenceClientModel -`HfApiModel` LLM เค•เฅ‡ เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคถเคจ เค•เฅ‡ เคฒเคฟเค [HF Inference API](https://huggingface.co/docs/api-inference/index) เค•เฅเคฒเคพเค‡เค‚เคŸ เค•เฅ‹ เคฐเฅˆเคช เค•เคฐเคคเคพ เคนเฅˆเฅค +`InferenceClientModel` LLM เค•เฅ‡ เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคถเคจ เค•เฅ‡ เคฒเคฟเค [HF Inference API](https://huggingface.co/docs/api-inference/index) เค•เฅเคฒเคพเค‡เค‚เคŸ เค•เฅ‹ เคฐเฅˆเคช เค•เคฐเคคเคพ เคนเฅˆเฅค ```python -from smolagents import HfApiModel +from smolagents import InferenceClientModel messages = [ {"role": "user", "content": "Hello, how are you?"}, @@ -111,13 +96,13 @@ messages = [ {"role": "user", "content": "No need to help, take it easy."}, ] -model = HfApiModel() +model = InferenceClientModel() print(model(messages)) ``` ```text >>> Of course! If you change your mind, feel free to reach out. Take care! ``` -[[autodoc]] HfApiModel +[[autodoc]] InferenceClientModel ### LiteLLMModel @@ -133,7 +118,7 @@ messages = [ {"role": "user", "content": "No need to help, take it easy."}, ] -model = LiteLLMModel("anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) +model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) print(model(messages)) ``` diff --git a/docs/source/hi/reference/tools.mdx b/docs/source/hi/reference/tools.mdx index 6c270321e..d7e0de98c 100644 --- a/docs/source/hi/reference/tools.mdx +++ b/docs/source/hi/reference/tools.mdx @@ -1,18 +1,3 @@ - # Tools diff --git a/docs/source/hi/tutorials/building_good_agents.mdx b/docs/source/hi/tutorials/building_good_agents.mdx index 92587ef35..0baa206f6 100644 --- a/docs/source/hi/tutorials/building_good_agents.mdx +++ b/docs/source/hi/tutorials/building_good_agents.mdx @@ -1,18 +1,3 @@ - # เค…เคšเฅเค›เฅ‡ Agents เค•เคพ เคจเคฟเคฐเฅเคฎเคพเคฃ [[open-in-colab]] @@ -122,11 +107,11 @@ def get_weather_api(location: str, date_time: str) -> str: ```py -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel model_id = "meta-llama/Llama-3.3-70B-Instruct" -agent = CodeAgent(tools=[], model=HfApiModel(model_id=model_id), add_base_tools=True) +agent = CodeAgent(tools=[], model=InferenceClientModel(model_id=model_id), add_base_tools=True) agent.run( "Why does Mike not know many people in New York?", @@ -211,13 +196,152 @@ In the end you have to return a final answer using the `final_answer` tool. Here are a few examples using notional tools: --- -{examples} +Task: "Generate an image of the oldest person in this document." -Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: +Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer. +Code: +```py +answer = document_qa(document=document, question="Who is the oldest person mentioned?") +print(answer) +``` +Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." -{{tool_descriptions}} +Thought: I will now generate an image showcasing the oldest person. +Code: +```py +image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.") +final_answer(image) +``` + +--- +Task: "What is the result of the following operation: 5 + 3 + 1294.678?" -{{managed_agents_descriptions}} +Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool +Code: +```py +result = 5 + 3 + 1294.678 +final_answer(result) +``` + +--- +Task: +"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French. +You have been provided with these additional arguments, that you can access using the keys as variables in your python code: +{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}" + +Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image. +Code: +```py +translated_question = translator(question=question, src_lang="French", tgt_lang="English") +print(f"The translated question is {translated_question}.") +answer = image_qa(image=image, question=translated_question) +final_answer(f"The answer is {answer}") +``` + +--- +Task: +In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer. +What does he say was the consequence of Einstein learning too much math on his creativity, in one word? + +Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin. +Code: +```py +pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein") +print(pages) +``` +Observation: +No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein". + +Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query. +Code: +```py +pages = search(query="1979 interview Stanislaus Ulam") +print(pages) +``` +Observation: +Found 6 pages: +[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/) + +[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/) + +(truncated) + +Thought: I will read the first 2 pages to know more. +Code: +```py +for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]: + whole_page = visit_webpage(url) + print(whole_page) + print("\n" + "="*80 + "\n") # Print separator between pages +``` +Observation: +Manhattan Project Locations: +Los Alamos, NM +Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at +(truncated) + +Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word. +Code: +```py +final_answer("diminished") +``` + +--- +Task: "Which city has the highest population: Guangzhou or Shanghai?" + +Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities. +Code: +```py +for city in ["Guangzhou", "Shanghai"]: + print(f"Population {city}:", search(f"{city} population") +``` +Observation: +Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] +Population Shanghai: '26 million (2019)' + +Thought: Now I know that Shanghai has the highest population. +Code: +```py +final_answer("Shanghai") +``` + +--- +Task: "What is the current age of the pope, raised to the power 0.36?" + +Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search. +Code: +```py +pope_age_wiki = wiki(query="current pope age") +print("Pope age as per wikipedia:", pope_age_wiki) +pope_age_search = web_search(query="current pope age") +print("Pope age as per google search:", pope_age_search) +``` +Observation: +Pope age: "The pope Francis is currently 88 years old." + +Thought: I know that the pope is 88 years old. Let's compute the result using python code. +Code: +```py +pope_current_age = 88 ** 0.36 +final_answer(pope_current_age) +``` + +Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: +{%- for tool in tools.values() %} +- {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.inputs}} + Returns an output of type: {{tool.output_type}} +{%- endfor %} + +{%- if managed_agents and managed_agents.values() | list %} +You can also give tasks to team members. +Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. +Given that this team member is a real human, you should be very verbose in your task. +Here is a list of the team members that you can call: +{%- for agent in managed_agents.values() %} +- {{ agent.name }}: {{ agent.description }} +{%- endfor %} +{%- endif %} Here are the rules you should always follow to solve your task: 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```' sequence, else you will fail. @@ -226,7 +350,7 @@ Here are the rules you should always follow to solve your task: 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block. 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters. 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'. -7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables. +7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables. 8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}} 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. 10. Don't give up! You're in charge of solving the task, not providing directions to solve it. @@ -234,11 +358,29 @@ Here are the rules you should always follow to solve your task: Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. ``` -เคœเฅˆเคธเคพ เค•เคฟ เค†เคช เคฆเฅ‡เค– เคธเค•เคคเฅ‡ เคนเฅˆเค‚, `"{{tool_descriptions}}"` เคœเฅˆเคธเฅ‡ เคชเฅเคฒเฅ‡เคธเคนเฅ‹เคฒเฅเคกเคฐเฅเคธ เคนเฅˆเค‚: เค‡เคจเค•เคพ เค‰เคชเคฏเฅ‹เค— เคเคœเฅ‡เค‚เคŸ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เค•เฅ‡ เคธเคฎเคฏ เคŸเฅ‚เคฒเฅเคธ เคฏเคพ เคฎเฅˆเคจเฅ‡เคœเฅเคก เคเคœเฅ‡เค‚เคŸเฅเคธ เค•เฅ‡ เค•เฅเค› เคธเฅเคตเคšเคพเคฒเคฟเคค เคฐเฅ‚เคช เคธเฅ‡ เคœเคจเคฐเฅ‡เคŸ เค•เคฟเค เค—เค เคตเคฟเคตเคฐเคฃเฅ‹เค‚ เค•เฅ‹ เคกเคพเคฒเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เค•เคฟเคฏเคพ เคœเคพเคเค—เคพเฅค +เคœเฅˆเคธเคพ เค•เคฟ เค†เคช เคฆเฅ‡เค– เคธเค•เคคเฅ‡ เคนเฅˆเค‚, `"{{ tool.description }}"` เคœเฅˆเคธเฅ‡ เคชเฅเคฒเฅ‡เคธเคนเฅ‹เคฒเฅเคกเคฐเฅเคธ เคนเฅˆเค‚: เค‡เคจเค•เคพ เค‰เคชเคฏเฅ‹เค— เคเคœเฅ‡เค‚เคŸ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเฅ‡เคถเคจ เค•เฅ‡ เคธเคฎเคฏ เคŸเฅ‚เคฒเฅเคธ เคฏเคพ เคฎเฅˆเคจเฅ‡เคœเฅเคก เคเคœเฅ‡เค‚เคŸเฅเคธ เค•เฅ‡ เค•เฅเค› เคธเฅเคตเคšเคพเคฒเคฟเคค เคฐเฅ‚เคช เคธเฅ‡ เคœเคจเคฐเฅ‡เคŸ เค•เคฟเค เค—เค เคตเคฟเคตเคฐเคฃเฅ‹เค‚ เค•เฅ‹ เคกเคพเคฒเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เค•เคฟเคฏเคพ เคœเคพเคเค—เคพเฅค เค‡เคธเคฒเคฟเค เคœเคฌเค•เคฟ เค†เคช `system_prompt` เคชเฅˆเคฐเคพเคฎเฅ€เคŸเคฐ เคฎเฅ‡เค‚ เค…เคชเคจเฅ‡ เค•เคธเฅเคŸเคฎ เคชเฅเคฐเฅ‰เคฎเฅเคชเฅเคŸ เค•เฅ‹ เค†เคฐเฅเค—เฅเคฎเฅ‡เค‚เคŸ เค•เฅ‡ เคฐเฅ‚เคช เคฎเฅ‡เค‚ เคชเคพเคธ เค•เคฐเค•เฅ‡ เค‡เคธ เคธเคฟเคธเฅเคŸเคฎ เคชเฅเคฐเฅ‰เคฎเฅเคชเฅเคŸ เคŸเฅ‡เคฎเฅเคชเคฒเฅ‡เคŸ เค•เฅ‹ เค“เคตเคฐเคฐเคพเค‡เคŸ เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚, เค†เคชเค•เฅ‡ เคจเค เคธเคฟเคธเฅเคŸเคฎ เคชเฅเคฐเฅ‰เคฎเฅเคชเฅเคŸ เคฎเฅ‡เค‚ เคจเคฟเคฎเฅเคจเคฒเคฟเค–เคฟเคค เคชเฅเคฒเฅ‡เคธเคนเฅ‹เคฒเฅเคกเคฐเฅเคธ เคนเฅ‹เคจเฅ‡ เคšเคพเคนเคฟเค: -- เคŸเฅ‚เคฒ เคตเคฟเคตเคฐเคฃ เคกเคพเคฒเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค `"{{tool_descriptions}}"`เฅค -- เคฏเคฆเคฟ เค•เฅ‹เคˆ เคฎเฅˆเคจเฅ‡เคœเฅเคก เคเคœเฅ‡เค‚เคŸเฅเคธ เคนเฅˆเค‚ เคคเฅ‹ เค‰เคจเค•เฅ‡ เคฒเคฟเค เคตเคฟเคตเคฐเคฃ เคกเคพเคฒเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค `"{{managed_agents_description}}"`เฅค +- เคŸเฅ‚เคฒ เคตเคฟเคตเคฐเคฃ เคกเคพเคฒเคจเฅ‡ เค•เฅ‡ เคฒเคฟเคเฅค + ``` + {%- for tool in tools.values() %} + - {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.inputs}} + Returns an output of type: {{tool.output_type}} + {%- endfor %} + ``` +- เคฏเคฆเคฟ เค•เฅ‹เคˆ เคฎเฅˆเคจเฅ‡เคœเฅเคก เคเคœเฅ‡เค‚เคŸเฅเคธ เคนเฅˆเค‚ เคคเฅ‹ เค‰เคจเค•เฅ‡ เคฒเคฟเค เคตเคฟเคตเคฐเคฃ เคกเคพเคฒเคจเฅ‡ เค•เฅ‡ เคฒเคฟเคเฅค + ``` + {%- if managed_agents and managed_agents.values() | list %} + You can also give tasks to team members. + Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. + Given that this team member is a real human, you should be very verbose in your task. + Here is a list of the team members that you can call: + {%- for agent in managed_agents.values() %} + - {{ agent.name }}: {{ agent.description }} + {%- endfor %} + {%- endif %} + ``` - เค•เฅ‡เคตเคฒ `CodeAgent` เค•เฅ‡ เคฒเคฟเค: เค…เคงเคฟเค•เฅƒเคค เค‡เคฎเฅเคชเฅ‹เคฐเฅเคŸเฅเคธ เค•เฅ€ เคธเฅ‚เคšเฅ€ เคกเคพเคฒเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค `"{{authorized_imports}}"`เฅค เคซเคฟเคฐ เค†เคช เคธเคฟเคธเฅเคŸเคฎ เคชเฅเคฐเฅ‰เคฎเฅเคชเฅเคŸ เค•เฅ‹ เคจเคฟเคฎเฅเคจเคพเคจเฅเคธเคพเคฐ เคฌเคฆเคฒ เคธเค•เคคเฅ‡ เคนเฅˆเค‚: @@ -255,7 +397,7 @@ This also works with the [`ToolCallingAgent`]. เคนเคฎ เคชเฅ‚เคฐเค• เคฏเฅ‹เคœเคจเคพ เคšเคฐเคฃ เค•เฅ‡ เคฒเคฟเค เคเค• เคฎเฅ‰เคกเคฒ เคชเฅเคฐเคฆเคพเคจ เค•เคฐเคคเฅ‡ เคนเฅˆเค‚, เคœเคฟเคธเฅ‡ เคเคœเฅ‡เค‚เคŸ เคธเคพเคฎเคพเคจเฅเคฏ เค•เฅเคฐเคฟเคฏเคพเค“เค‚ เค•เฅ‡ เคšเคฐเคฃเฅ‹เค‚ เค•เฅ‡ เคฌเฅ€เคš เคจเคฟเคฏเคฎเคฟเคค เคฐเฅ‚เคช เคธเฅ‡ เคšเคฒเคพ เคธเค•เคคเคพ เคนเฅˆเฅค เค‡เคธ เคšเคฐเคฃ เคฎเฅ‡เค‚ เค•เฅ‹เคˆ เคŸเฅ‚เคฒ เค•เฅ‰เคฒ เคจเคนเฅ€เค‚ เคนเฅ‹เคคเฅ€ เคนเฅˆ, LLM เคธเฅ‡ เค•เฅ‡เคตเคฒ เค‰เคจ เคคเคฅเฅเคฏเฅ‹เค‚ เค•เฅ€ เคธเฅ‚เคšเฅ€ เค•เฅ‹ เค…เคชเคกเฅ‡เคŸ เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เค•เคนเคพ เคœเคพเคคเคพ เคนเฅˆ เคœเฅ‹ เค‰เคธเฅ‡ เคœเฅเคžเคพเคค เคนเฅˆเค‚ เค”เคฐ เค‡เคจ เคคเคฅเฅเคฏเฅ‹เค‚ เค•เฅ‡ เค†เคงเคพเคฐ เคชเคฐ เค‰เคธเฅ‡ เค…เค—เคฒเฅ‡ เค•เคฆเคฎเฅ‹เค‚ เค•เฅ‡ เคฌเคพเคฐเฅ‡ เคฎเฅ‡เค‚ เคตเคฟเคšเคพเคฐ เค•เคฐเคจเคพ เคนเฅ‹เคคเคพ เคนเฅˆเฅค ```py -from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool +from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool from dotenv import load_dotenv load_dotenv() @@ -267,7 +409,7 @@ search_tool = DuckDuckGoSearchTool() agent = CodeAgent( tools=[search_tool], - model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"), + model=InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct"), planning_interval=3 # This is where you activate planning! ) diff --git a/docs/source/hi/tutorials/inspect_runs.mdx b/docs/source/hi/tutorials/inspect_runs.mdx index 0669c4dcc..127bca148 100644 --- a/docs/source/hi/tutorials/inspect_runs.mdx +++ b/docs/source/hi/tutorials/inspect_runs.mdx @@ -1,18 +1,3 @@ - # OpenTelemetry เค•เฅ‡ เคธเคพเคฅ runs เค•เคพ เคจเคฟเคฐเฅ€เค•เฅเคทเคฃ [[open-in-colab]] @@ -73,10 +58,10 @@ from smolagents import ( ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, - HfApiModel, + InferenceClientModel, ) -model = HfApiModel() +model = InferenceClientModel() managed_agent = ToolCallingAgent( tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], diff --git a/docs/source/hi/tutorials/secure_code_execution.mdx b/docs/source/hi/tutorials/secure_code_execution.mdx index ad2cd8c34..73719e842 100644 --- a/docs/source/hi/tutorials/secure_code_execution.mdx +++ b/docs/source/hi/tutorials/secure_code_execution.mdx @@ -1,18 +1,3 @@ - # เคธเฅเคฐเค•เฅเคทเคฟเคค เค•เฅ‹เคก เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคถเคจ [[open-in-colab]] @@ -41,7 +26,7 @@ rendered properly in your Markdown viewer. ### เคฒเฅ‹เค•เคฒ เคชเคพเคฏเคฅเคจ เค‡เค‚เคŸเคฐเคชเฅเคฐเฅ‡เคŸเคฐ เคกเคฟเคซเคผเฅ‰เคฒเฅเคŸ เคฐเฅ‚เคช เคธเฅ‡, `CodeAgent` LLM-เคœเคจเคฐเฅ‡เคŸเฅ‡เคก เค•เฅ‹เคก เค•เฅ‹ เค†เคชเค•เฅ‡ เคเคจเคตเคพเคฏเคฐเคจเคฎเฅ‡เค‚เคŸ เคฎเฅ‡เค‚ เคšเคฒเคพเคคเคพ เคนเฅˆเฅค -เคฏเคน เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคถเคจ เคตเฅˆเคจเคฟเคฒเคพ เคชเคพเคฏเคฅเคจ เค‡เค‚เคŸเคฐเคชเฅเคฐเฅ‡เคŸเคฐ เคฆเฅเคตเคพเคฐเคพ เคจเคนเฅ€เค‚ เค•เคฟเคฏเคพ เคœเคพเคคเคพ: เคนเคฎเคจเฅ‡ เคเค• เค…เคงเคฟเค• เคธเฅเคฐเค•เฅเคทเคฟเคค `LocalPythonInterpreter` เค•เฅ‹ เคถเฅเคฐเฅ‚ เคธเฅ‡ เคซเคฟเคฐ เคธเฅ‡ เคฌเคจเคพเคฏเคพ เคนเฅˆเฅค +เคฏเคน เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคถเคจ เคตเฅˆเคจเคฟเคฒเคพ เคชเคพเคฏเคฅเคจ เค‡เค‚เคŸเคฐเคชเฅเคฐเฅ‡เคŸเคฐ เคฆเฅเคตเคพเคฐเคพ เคจเคนเฅ€เค‚ เค•เคฟเคฏเคพ เคœเคพเคคเคพ: เคนเคฎเคจเฅ‡ เคเค• เค…เคงเคฟเค• เคธเฅเคฐเค•เฅเคทเคฟเคค `LocalPythonExecutor` เค•เฅ‹ เคถเฅเคฐเฅ‚ เคธเฅ‡ เคซเคฟเคฐ เคธเฅ‡ เคฌเคจเคพเคฏเคพ เคนเฅˆเฅค เคฏเคน เค‡เค‚เคŸเคฐเคชเฅเคฐเฅ‡เคŸเคฐ เคธเฅเคฐเค•เฅเคทเคพ เค•เฅ‡ เคฒเคฟเค เคกเคฟเคœเคผเคพเค‡เคจ เค•เคฟเคฏเคพ เค—เคฏเคพ เคนเฅˆ: - เค‡เคฎเฅเคชเฅ‹เคฐเฅเคŸเฅเคธ เค•เฅ‹ เค‰เคชเคฏเฅ‹เค—เค•เคฐเฅเคคเคพ เคฆเฅเคตเคพเคฐเคพ เคธเฅเคชเคทเฅเคŸ เคฐเฅ‚เคช เคธเฅ‡ เคชเคพเคธ เค•เฅ€ เค—เคˆ เคธเฅ‚เคšเฅ€ เคคเค• เคธเฅ€เคฎเคฟเคค เค•เคฐเคจเคพ - เค‡เคจเคซเคฟเคจเคฟเคŸ เคฒเฅ‚เคชเฅเคธ เค”เคฐ เคฐเคฟเคธเฅ‹เคฐเฅเคธ เคฌเฅเคฒเฅ‹เคŸเคฟเค‚เค— เค•เฅ‹ เคฐเฅ‹เค•เคจเฅ‡ เค•เฅ‡ เคฒเคฟเค เค‘เคชเคฐเฅ‡เคถเค‚เคธ เค•เฅ€ เคธเค‚เค–เฅเคฏเคพ เค•เฅ‹ เค•เฅˆเคช เค•เคฐเคจเคพ @@ -64,16 +49,16 @@ rendered properly in your Markdown viewer. เค…เคฌ เค†เคช เคคเฅˆเคฏเคพเคฐ เคนเฅˆเค‚! -เค•เฅ‹เคก เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคŸเคฐ เค•เฅ‹ E2B เคชเคฐ เคธเฅ‡เคŸ เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค, เคฌเคธ เค…เคชเคจเฅ‡ `CodeAgent` เค•เฅ‹ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเคผ เค•เคฐเคคเฅ‡ เคธเคฎเคฏ `use_e2b_executor=True` เคซเฅเคฒเฅˆเค— เคชเคพเคธ เค•เคฐเฅ‡เค‚เฅค +เค•เฅ‹เคก เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคŸเคฐ เค•เฅ‹ E2B เคชเคฐ เคธเฅ‡เคŸ เค•เคฐเคจเฅ‡ เค•เฅ‡ เคฒเคฟเค, เคฌเคธ เค…เคชเคจเฅ‡ `CodeAgent` เค•เฅ‹ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเคผ เค•เคฐเคคเฅ‡ เคธเคฎเคฏ `executor_type="e2b"` เคซเฅเคฒเฅˆเค— เคชเคพเคธ เค•เคฐเฅ‡เค‚เฅค เคงเฅเคฏเคพเคจ เคฆเฅ‡เค‚ เค•เคฟ เค†เคชเค•เฅ‹ `additional_authorized_imports` เคฎเฅ‡เค‚ เคธเคญเฅ€ เคŸเฅ‚เคฒ เค•เฅ€ เคกเคฟเคชเฅ‡เค‚เคกเฅ‡เค‚เคธเฅ€เคœเคผ เคœเฅ‹เคกเคผเคจเฅ€ เคšเคพเคนเคฟเค, เคคเคพเค•เคฟ เคเค•เฅเคœเฅ€เค•เฅเคฏเฅ‚เคŸเคฐ เค‰เคจเฅเคนเฅ‡เค‚ เค‡เค‚เคธเฅเคŸเฅ‰เคฒ เค•เคฐเฅ‡เฅค ```py -from smolagents import CodeAgent, VisitWebpageTool, HfApiModel +from smolagents import CodeAgent, VisitWebpageTool, InferenceClientModel agent = CodeAgent( tools = [VisitWebpageTool()], - model=HfApiModel(), + model=InferenceClientModel(), additional_authorized_imports=["requests", "markdownify"], - use_e2b_executor=True + executor_type="e2b" ) agent.run("What was Abraham Lincoln's preferred pet?") diff --git a/docs/source/hi/tutorials/tools.mdx b/docs/source/hi/tutorials/tools.mdx index bb56d7bfc..2695217d2 100644 --- a/docs/source/hi/tutorials/tools.mdx +++ b/docs/source/hi/tutorials/tools.mdx @@ -1,18 +1,3 @@ - # Tools [[open-in-colab]] @@ -134,9 +119,9 @@ image_generation_tool("A sunny beach") เคซเคฟเคฐ เค†เคช เค‡เคธ เคŸเฅ‚เคฒ เค•เคพ เค‰เคชเคฏเฅ‹เค— เค•เคฟเคธเฅ€ เค…เคจเฅเคฏ เคŸเฅ‚เคฒ เค•เฅ€ เคคเคฐเคน เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค เค‰เคฆเคพเคนเคฐเคฃ เค•เฅ‡ เคฒเคฟเค, เคšเคฒเคฟเค เคชเฅเคฐเฅ‰เคฎเฅเคชเฅเคŸ `a rabbit wearing a space suit` เค•เฅ‹ เคธเฅเคงเคพเคฐเฅ‡เค‚ เค”เคฐ เค‡เคธเค•เฅ€ เคเค• เค‡เคฎเฅ‡เคœ เคœเคจเคฐเฅ‡เคŸ เค•เคฐเฅ‡เค‚เฅค เคฏเคน เค‰เคฆเคพเคนเคฐเคฃ เคฏเคน เคญเฅ€ เคฆเคฟเค–เคพเคคเคพ เคนเฅˆ เค•เคฟ เค†เคช เคเคœเฅ‡เค‚เคŸ เค•เฅ‹ เค…เคคเคฟเคฐเคฟเค•เฅเคค เค†เคฐเฅเค—เฅเคฏเฅ‚เคฎเฅ‡เค‚เคŸเฅเคธ เค•เฅˆเคธเฅ‡ เคชเคพเคธ เค•เคฐ เคธเค•เคคเฅ‡ เคนเฅˆเค‚เฅค ```python -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel -model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") +model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") agent = CodeAgent(tools=[image_generation_tool], model=model) agent.run( @@ -182,9 +167,9 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode เคšเคฒเคฟเค เค•เฅ‡เคตเคฒ เคกเคฟเคซเคผเฅ‰เคฒเฅเคŸ เคŸเฅ‚เคฒเคฌเฅ‰เค•เฅเคธ เค•เฅ‡ เคธเคพเคฅ เค‡เคจเคฟเคถเคฟเคฏเคฒเคพเค‡เคœเคผ เค•เคฟเค เค—เค เคฎเฅŒเคœเฅ‚เคฆเคพ เคเคœเฅ‡เค‚เคŸ เคฎเฅ‡เค‚ `model_download_tool` เคœเฅ‹เคกเคผเฅ‡เค‚เฅค ```python -from smolagents import HfApiModel +from smolagents import InferenceClientModel -model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") +model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.tools[model_download_tool.name] = model_download_tool @@ -241,7 +226,7 @@ server_parameters = StdioServerParameters( env={"UV_PYTHON": "3.12", **os.environ}, ) -with ToolCollection.from_mcp(server_parameters) as tool_collection: +with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection: agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True) agent.run("Please find a remedy for hangover.") ``` \ No newline at end of file diff --git a/docs/source/zh/_toctree.yml b/docs/source/zh/_toctree.yml index 4da8f4859..5ebe325c9 100644 --- a/docs/source/zh/_toctree.yml +++ b/docs/source/zh/_toctree.yml @@ -8,10 +8,14 @@ sections: - local: tutorials/building_good_agents title: โœจ ๆž„ๅปบๅฅฝ็”จ็š„ agents + - local: tutorials/inspect_runs + title: ๐Ÿ“Š ็›‘ๆŽง Agent ็š„่ฟ่กŒ - local: tutorials/tools title: ๐Ÿ› ๏ธ ๅทฅๅ…ท - ๆทฑๅบฆๆŒ‡ๅ— - local: tutorials/secure_code_execution title: ๐Ÿ›ก๏ธ ไฝฟ็”จ E2B ไฟๆŠคไฝ ็š„ไปฃ็ ๆ‰ง่กŒ + - local: tutorials/memory + title: ๐Ÿ“š ็ฎก็† Agent ็š„่ฎฐๅฟ† - title: Conceptual guides sections: - local: conceptual_guides/intro_agents @@ -21,14 +25,18 @@ - title: Examples sections: - local: examples/text_to_sql - title: Self-correcting Text-to-SQL + title: ่‡ชๆˆ‘ไฟฎๆญฃ Text-to-SQL - local: examples/rag - title: Master you knowledge base with agentic RAG + title: ๅ€ŸๅŠฉ agentic RAG ๆŽŒๆŽง็Ÿฅ่ฏ†ๅบ“ - local: examples/multiagents - title: Orchestrate a multi-agent system + title: ็ผ–ๆŽ’ multi-agent ็ณป็ปŸ + - local: examples/web_browser + title: ๅŸบไบŽ่ง†่ง‰ๆจกๅž‹ๆž„ๅปบ่ƒฝๅคŸๆต่งˆ็ฝ‘้กต็š„agent - title: Reference sections: - local: reference/agents title: Agent-related objects + - local: reference/models + title: Model-related objects - local: reference/tools title: Tool-related objects diff --git a/docs/source/zh/conceptual_guides/intro_agents.mdx b/docs/source/zh/conceptual_guides/intro_agents.mdx index 416aabcb5..6b09349e4 100644 --- a/docs/source/zh/conceptual_guides/intro_agents.mdx +++ b/docs/source/zh/conceptual_guides/intro_agents.mdx @@ -1,19 +1,3 @@ - - # Agent ็ฎ€ไป‹ > [!TIP] diff --git a/docs/source/zh/conceptual_guides/react.mdx b/docs/source/zh/conceptual_guides/react.mdx index cdb970728..44760fb0c 100644 --- a/docs/source/zh/conceptual_guides/react.mdx +++ b/docs/source/zh/conceptual_guides/react.mdx @@ -1,18 +1,3 @@ - # ๅคšๆญฅ้ชค agent ๆ˜ฏๅฆ‚ไฝ•ๅทฅไฝœ็š„๏ผŸ ReAct ๆก†ๆžถ๏ผˆ[Yao et al., 2022](https://huggingface.co/papers/2210.03629)๏ผ‰ๆ˜ฏ็›ฎๅ‰ๆž„ๅปบ agent ็š„ไธป่ฆๆ–นๆณ•ใ€‚ diff --git a/docs/source/zh/examples/multiagents.mdx b/docs/source/zh/examples/multiagents.mdx index 3b177d133..567e7573f 100644 --- a/docs/source/zh/examples/multiagents.mdx +++ b/docs/source/zh/examples/multiagents.mdx @@ -1,18 +1,3 @@ - # ็ผ–ๆŽ’ multi-agent ็ณป็ปŸ ๐Ÿค–๐Ÿค๐Ÿค– [[open-in-colab]] @@ -53,7 +38,7 @@ login() ``` โšก๏ธ HF็š„Inference API ๅฏไปฅๅฟซ้€Ÿ่ฝปๆพๅœฐ่ฟ่กŒไปปไฝ•ๅผ€ๆบๆจกๅž‹๏ผŒๅ› ๆญคๆˆ‘ไปฌ็š„agentๅฐ†ไฝฟ็”จHF็š„Inference API -ไธญ็š„`HfApiModel`็ฑปๆฅ่ฐƒ็”จ +ไธญ็š„`InferenceClientModel`็ฑปๆฅ่ฐƒ็”จ [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)ๆจกๅž‹ใ€‚ _Note:_ ๅŸบไบŽๅคšๅ‚ๆ•ฐๅ’Œ้ƒจ็ฝฒๆจกๅž‹็š„ Inference API ๅฏ่ƒฝๅœจๆฒกๆœ‰้ข„ๅ…ˆ้€š็Ÿฅ็š„ๆƒ…ๅ†ตไธ‹ๆ›ดๆ–ฐๆˆ–ๆ›ฟๆขๆจกๅž‹ใ€‚ไบ†่งฃๆ›ดๅคšไฟกๆฏ๏ผŒ่ฏทๅ‚้˜…[่ฟ™้‡Œ](https://huggingface.co/docs/api-inference/supported-models)ใ€‚ @@ -127,13 +112,13 @@ print(visit_webpage("https://en.wikipedia.org/wiki/Hugging_Face")[:500]) from smolagents import ( CodeAgent, ToolCallingAgent, - HfApiModel, + InferenceClientModel, ManagedAgent, DuckDuckGoSearchTool, LiteLLMModel, ) -model = HfApiModel(model_id) +model = InferenceClientModel(model_id=model_id) web_agent = ToolCallingAgent( tools=[DuckDuckGoSearchTool(), visit_webpage], diff --git a/docs/source/zh/examples/rag.mdx b/docs/source/zh/examples/rag.mdx index 23efa9e0e..bed9b7fb6 100644 --- a/docs/source/zh/examples/rag.mdx +++ b/docs/source/zh/examples/rag.mdx @@ -1,18 +1,3 @@ - # Agentic RAG [[open-in-colab]] @@ -38,7 +23,7 @@ Retrieval-Augmented-Generation (RAG) ๆ˜ฏโ€œไฝฟ็”จๅคง่ฏญ่จ€ๆจกๅž‹๏ผˆLLM๏ผ‰ๆฅๅ›ž !pip install smolagents pandas langchain langchain-community sentence-transformers rank_bm25 --upgrade -q ``` -ไฝ ้œ€่ฆไธ€ไธชๆœ‰ๆ•ˆ็š„ token ไฝœไธบ็Žฏๅขƒๅ˜้‡ `HF_TOKEN` ๆฅ่ฐƒ็”จ HF Inference APIใ€‚ๆˆ‘ไปฌไฝฟ็”จ python-dotenv ๆฅๅŠ ่ฝฝๅฎƒใ€‚ +ไฝ ้œ€่ฆไธ€ไธชๆœ‰ๆ•ˆ็š„ token ไฝœไธบ็Žฏๅขƒๅ˜้‡ `HF_TOKEN` ๆฅ่ฐƒ็”จ Inference Providersใ€‚ๆˆ‘ไปฌไฝฟ็”จ python-dotenv ๆฅๅŠ ่ฝฝๅฎƒใ€‚ ```py from dotenv import load_dotenv load_dotenv() @@ -126,10 +111,10 @@ BM25 ๆฃ€็ดขๆ–นๆณ•ๆ˜ฏไธ€ไธช็ปๅ…ธ็š„ๆฃ€็ดขๆ–นๆณ•๏ผŒๅ› ไธบๅฎƒ็š„่ฎพ็ฝฎ้€Ÿๅบฆ้žๅธธ _Note:_ ๆญค Inference API ๆ‰˜็ฎกๅŸบไบŽๅ„็งๆ ‡ๅ‡†็š„ๆจกๅž‹๏ผŒ้ƒจ็ฝฒ็š„ๆจกๅž‹ๅฏ่ƒฝไผšๅœจๆฒกๆœ‰ไบ‹ๅ…ˆ้€š็Ÿฅ็š„ๆƒ…ๅ†ตไธ‹่ฟ›่กŒๆ›ดๆ–ฐๆˆ–ๆ›ฟๆขใ€‚ไบ†่งฃๆ›ดๅคšไฟกๆฏ๏ผŒ่ฏท็‚นๅ‡ป[่ฟ™้‡Œ](https://huggingface.co/docs/api-inference/supported-models)ใ€‚ ```py -from smolagents import HfApiModel, CodeAgent +from smolagents import InferenceClientModel, CodeAgent agent = CodeAgent( - tools=[retriever_tool], model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True + tools=[retriever_tool], model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbose=True ) ``` diff --git a/docs/source/zh/examples/text_to_sql.mdx b/docs/source/zh/examples/text_to_sql.mdx index 419c45159..349d31f6f 100644 --- a/docs/source/zh/examples/text_to_sql.mdx +++ b/docs/source/zh/examples/text_to_sql.mdx @@ -1,18 +1,3 @@ - # Text-to-SQL [[open-in-colab]] @@ -121,14 +106,14 @@ def sql_engine(query: str) -> str: ๆˆ‘ไปฌ็Žฐๅœจไฝฟ็”จ่ฟ™ไธชๅทฅๅ…ทๆฅๅˆ›ๅปบไธ€ไธช agentใ€‚ๆˆ‘ไปฌไฝฟ็”จ `CodeAgent`๏ผŒ่ฟ™ๆ˜ฏ smolagent ็š„ไธป่ฆ agent ็ฑป๏ผšไธ€ไธชๅœจไปฃ็ ไธญ็ผ–ๅ†™ๆ“ไฝœๅนถๆ นๆฎ ReAct ๆก†ๆžถ่ฟญไปฃๅ…ˆๅ‰่พ“ๅ‡บ็š„ agentใ€‚ -่ฟ™ไธชๆจกๅž‹ๆ˜ฏ้ฉฑๅŠจ agent ็ณป็ปŸ็š„ LLMใ€‚`HfApiModel` ๅ…่ฎธไฝ ไฝฟ็”จ HF Inference API ่ฐƒ็”จ LLM๏ผŒๆ— ่ฎบๆ˜ฏ้€š่ฟ‡ Serverless ่ฟ˜ๆ˜ฏ Dedicated endpoint๏ผŒไฝ†ไฝ ไนŸๅฏไปฅไฝฟ็”จไปปไฝ•ไธ“ๆœ‰ APIใ€‚ +่ฟ™ไธชๆจกๅž‹ๆ˜ฏ้ฉฑๅŠจ agent ็ณป็ปŸ็š„ LLMใ€‚`InferenceClientModel` ๅ…่ฎธไฝ ไฝฟ็”จ HF Inference API ่ฐƒ็”จ LLM๏ผŒๆ— ่ฎบๆ˜ฏ้€š่ฟ‡ Serverless ่ฟ˜ๆ˜ฏ Dedicated endpoint๏ผŒไฝ†ไฝ ไนŸๅฏไปฅไฝฟ็”จไปปไฝ•ไธ“ๆœ‰ APIใ€‚ ```py -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel agent = CodeAgent( tools=[sql_engine], - model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"), + model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"), ) agent.run("Can you give me the name of the client who got the most expensive receipt?") ``` @@ -184,7 +169,7 @@ sql_engine.description = updated_description agent = CodeAgent( tools=[sql_engine], - model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct"), + model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"), ) agent.run("Which waiter got more total money from tips?") diff --git a/docs/source/zh/examples/web_browser.mdx b/docs/source/zh/examples/web_browser.mdx new file mode 100644 index 000000000..cf65225ed --- /dev/null +++ b/docs/source/zh/examples/web_browser.mdx @@ -0,0 +1,214 @@ +# ไฝฟ็”จAgentๅฎž็Žฐ็ฝ‘้กตๆต่งˆๅ™จ่‡ชๅŠจๅŒ– ๐Ÿค–๐ŸŒ + +[[open-in-colab]] + +ๅœจๆœฌnotebookไธญ๏ผŒๆˆ‘ไปฌๅฐ†ๅˆ›ๅปบไธ€ไธช**ๅŸบไบŽAgent็š„็ฝ‘้กตๆต่งˆๅ™จ่‡ชๅŠจๅŒ–็ณป็ปŸ**๏ผ่ฏฅ็ณป็ปŸๅฏไปฅ่‡ชๅŠจๅฏผ่ˆช็ฝ‘็ซ™ใ€ไธŽ็ฝ‘้กตๅ…ƒ็ด ไบคไบ’ๅนถๆๅ–ไฟกๆฏใ€‚ + +่ฏฅAgentๅฐ†่ƒฝๅคŸ๏ผš + +- [x] ๅฏผ่ˆชๅˆฐ็ฝ‘้กต +- [x] ็‚นๅ‡ปๅ…ƒ็ด  +- [x] ๅœจ้กต้ขๅ†…ๆœ็ดข +- [x] ๅค„็†ๅผนๅ‡บ็ช—ๅฃๅ’Œๆจกๆ€ๆก† +- [x] ๆๅ–ไฟกๆฏ + +่ฎฉๆˆ‘ไปฌไธ€ๆญฅๆญฅๆญๅปบ่ฟ™ไธช็ณป็ปŸ๏ผ + +้ฆ–ๅ…ˆ่ฟ่กŒไปฅไธ‹ๅ‘ฝไปคๅฎ‰่ฃ…ๆ‰€้œ€ไพ่ต–๏ผš + +```bash +pip install smolagents selenium helium pillow -q +``` + +่ฎฉๆˆ‘ไปฌๅฏผๅ…ฅๆ‰€้œ€็š„ๅบ“ๅนถ่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡๏ผš + +```python +from io import BytesIO +from time import sleep + +import helium +from dotenv import load_dotenv +from PIL import Image +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys + +from smolagents import CodeAgent, tool +from smolagents.agents import ActionStep + +# Load environment variables +load_dotenv() +``` + +็Žฐๅœจๆˆ‘ไปฌๆฅๅˆ›ๅปบๆ ธๅฟƒ็š„ๆต่งˆๅ™จไบคไบ’ๅทฅๅ…ท๏ผŒไฝฟๆˆ‘ไปฌ็š„Agent่ƒฝๅคŸๅฏผ่ˆชๅนถไธŽ็ฝ‘้กตไบคไบ’๏ผš + +```python +@tool +def search_item_ctrl_f(text: str, nth_result: int = 1) -> str: + """ + Searches for text on the current page via Ctrl + F and jumps to the nth occurrence. + Args: + text: The text to search for + nth_result: Which occurrence to jump to (default: 1) + """ + elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]") + if nth_result > len(elements): + raise Exception(f"Match nยฐ{nth_result} not found (only {len(elements)} matches found)") + result = f"Found {len(elements)} matches for '{text}'." + elem = elements[nth_result - 1] + driver.execute_script("arguments[0].scrollIntoView(true);", elem) + result += f"Focused on element {nth_result} of {len(elements)}" + return result + +@tool +def go_back() -> None: + """Goes back to previous page.""" + driver.back() + +@tool +def close_popups() -> str: + """ + Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! + This does not work on cookie consent banners. + """ + webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform() +``` + +่ฎฉๆˆ‘ไปฌ้…็ฝฎไฝฟ็”จChromeๆต่งˆๅ™จๅนถ่ฎพ็ฝฎๆˆชๅ›พๅŠŸ่ƒฝ๏ผš + +```python +# Configure Chrome options +chrome_options = webdriver.ChromeOptions() +chrome_options.add_argument("--force-device-scale-factor=1") +chrome_options.add_argument("--window-size=1000,1350") +chrome_options.add_argument("--disable-pdf-viewer") +chrome_options.add_argument("--window-position=0,0") + +# Initialize the browser +driver = helium.start_chrome(headless=False, options=chrome_options) + +# Set up screenshot callback +def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: + sleep(1.0) # Let JavaScript animations happen before taking the screenshot + driver = helium.get_driver() + current_step = memory_step.step_number + if driver is not None: + for previous_memory_step in agent.memory.steps: # Remove previous screenshots for lean processing + if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2: + previous_memory_step.observations_images = None + png_bytes = driver.get_screenshot_as_png() + image = Image.open(BytesIO(png_bytes)) + print(f"Captured a browser screenshot: {image.size} pixels") + memory_step.observations_images = [image.copy()] # Create a copy to ensure it persists + + # Update observations with current URL + url_info = f"Current url: {driver.current_url}" + memory_step.observations = ( + url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info + ) +``` + +็Žฐๅœจๆˆ‘ไปฌๆฅๅˆ›ๅปบ็ฝ‘้กต่‡ชๅŠจๅŒ–Agent๏ผš + +```python +from smolagents import InferenceClientModel + +# Initialize the model +model_id = "meta-llama/Llama-3.3-70B-Instruct" # You can change this to your preferred model +model = InferenceClientModel(model_id=model_id) + +# Create the agent +agent = CodeAgent( + tools=[go_back, close_popups, search_item_ctrl_f], + model=model, + additional_authorized_imports=["helium"], + step_callbacks=[save_screenshot], + max_steps=20, + verbosity_level=2, +) + +# Import helium for the agent +agent.python_executor("from helium import *", agent.state) +``` + +Agent้œ€่ฆ่Žทๅพ—ๅ…ณไบŽๅฆ‚ไฝ•ไฝฟ็”จHelium่ฟ›่กŒ็ฝ‘้กต่‡ชๅŠจๅŒ–็š„ๆŒ‡ๅฏผใ€‚ไปฅไธ‹ๆ˜ฏๆˆ‘ไปฌๅฐ†ๆไพ›็š„ๆ“ไฝœ่ฏดๆ˜Ž๏ผš + +```python +helium_instructions = """ +You can use helium to access websites. Don't bother about the helium driver, it's already managed. +We've already ran "from helium import *" +Then you can go to pages! +Code: +```py +go_to('github.com/trending') +``` + +You can directly click clickable elements by inputting the text that appears on them. +Code: +```py +click("Top products") +``` + +If it's a link: +Code: +```py +click(Link("Top products")) +``` + +If you try to interact with an element and it's not found, you'll get a LookupError. +In general stop your action after each button click to see what happens on your screenshot. +Never try to login in a page. + +To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from. +Code: +```py +scroll_down(num_pixels=1200) # This will scroll one viewport down +``` + +When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails). +Just use your built-in tool `close_popups` to close them: +Code: +```py +close_popups() +``` + +You can use .exists() to check for the existence of an element. For example: +Code: +```py +if Text('Accept cookies?').exists(): + click('I accept') +``` +""" +``` + +็Žฐๅœจๆˆ‘ไปฌๅฏไปฅ่ฟ่กŒAgentๆ‰ง่กŒไปปๅŠกไบ†๏ผ่ฎฉๆˆ‘ไปฌๅฐ่ฏ•ๅœจ็ปดๅŸบ็™พ็ง‘ไธŠๆŸฅๆ‰พไฟกๆฏ๏ผš + +```python +search_request = """ +Please navigate to https://en.wikipedia.org/wiki/Chicago and give me a sentence containing the word "1992" that mentions a construction accident. +""" + +agent_output = agent.run(search_request + helium_instructions) +print("Final output:") +print(agent_output) +``` + +ๆ‚จๅฏไปฅ้€š่ฟ‡ไฟฎๆ”น่ฏทๆฑ‚ๅ‚ๆ•ฐๆ‰ง่กŒไธๅŒไปปๅŠกใ€‚ไพ‹ๅฆ‚๏ผŒไปฅไธ‹่ฏทๆฑ‚ๅฏๅธฎๅŠฉๆˆ‘ๅˆคๆ–ญๆ˜ฏๅฆ้œ€่ฆๆ›ดๅŠ ๅŠชๅŠ›ๅทฅไฝœ๏ผš + +```python +github_request = """ +I'm trying to find how hard I have to work to get a repo in github.com/trending. +Can you navigate to the profile for the top author of the top trending repo, and give me their total number of commits over the last year? +""" + +agent_output = agent.run(github_request + helium_instructions) +print("Final output:") +print(agent_output) +``` + +่ฏฅ็ณป็ปŸๅœจไปฅไธ‹ไปปๅŠกไธญๅฐคไธบๆœ‰ๆ•ˆ๏ผš + +- ไปŽ็ฝ‘็ซ™ๆๅ–ๆ•ฐๆฎ +- ็ฝ‘้กต็ ”็ฉถ่‡ชๅŠจๅŒ– +- ็”จๆˆท็•Œ้ขๆต‹่ฏ•ไธŽ้ชŒ่ฏ +- ๅ†…ๅฎน็›‘ๆŽง \ No newline at end of file diff --git a/docs/source/zh/guided_tour.mdx b/docs/source/zh/guided_tour.mdx index 54ae10419..e851b79b8 100644 --- a/docs/source/zh/guided_tour.mdx +++ b/docs/source/zh/guided_tour.mdx @@ -1,18 +1,3 @@ - # Agents - ๅฏผ่งˆ [[open-in-colab]] @@ -31,26 +16,28 @@ rendered properly in your Markdown viewer. - `model`๏ผŒไธ€ไธชไธบๆ‚จ็š„ agent ๆไพ›ๅŠจๅŠ›็š„ๆ–‡ๆœฌ็”Ÿๆˆๆจกๅž‹ - ๅ› ไธบ agent ไธŽ็ฎ€ๅ•็š„ LLM ไธๅŒ๏ผŒๅฎƒๆ˜ฏไธ€ไธชไฝฟ็”จ LLM ไฝœไธบๅผ•ๆ“Ž็š„็ณป็ปŸใ€‚ๆ‚จๅฏไปฅไฝฟ็”จไปฅไธ‹ไปปไธ€้€‰้กน๏ผš - [`TransformersModel`] ไฝฟ็”จ้ข„ๅˆๅง‹ๅŒ–็š„ `transformers` ็ฎก้“ๅœจๆœฌๅœฐๆœบๅ™จไธŠ่ฟ่กŒๆŽจ็† - - [`HfApiModel`] ๅœจๅบ•ๅฑ‚ไฝฟ็”จ `huggingface_hub.InferenceClient` + - [`InferenceClientModel`] ๅœจๅบ•ๅฑ‚ไฝฟ็”จ `huggingface_hub.InferenceClient` - [`LiteLLMModel`] ่ฎฉๆ‚จ้€š่ฟ‡ [LiteLLM](https://docs.litellm.ai/) ่ฐƒ็”จ 100+ ไธๅŒ็š„ๆจกๅž‹๏ผ + - [`AzureOpenAIServerModel`] ๅ…่ฎธๆ‚จไฝฟ็”จ้ƒจ็ฝฒๅœจ [Azure](https://azure.microsoft.com/en-us/products/ai-services/openai-service) ไธญ็š„ OpenAI ๆจกๅž‹ใ€‚ + - [`MLXModel`] ๅฏๅˆ›ๅปบ [mlx-lm](https://pypi.org/project/mlx-lm/) ๆตๆฐด็บฟ๏ผŒไปฅไพฟๅœจๆœฌๅœฐๆœบๅ™จไธŠ่ฟ่กŒๆŽจ็†ใ€‚ - `tools`๏ผŒagent ๅฏไปฅ็”จๆฅ่งฃๅ†ณไปปๅŠก็š„ `Tools` ๅˆ—่กจใ€‚ๅฎƒๅฏไปฅๆ˜ฏไธ€ไธช็ฉบๅˆ—่กจใ€‚ๆ‚จ่ฟ˜ๅฏไปฅ้€š่ฟ‡ๅฎšไน‰ๅฏ้€‰ๅ‚ๆ•ฐ `add_base_tools=True` ๅœจๆ‚จ็š„ `tools` ๅˆ—่กจไน‹ไธŠๆทปๅŠ ้ป˜่ฎคๅทฅๅ…ท็ฎฑใ€‚ -ไธ€ๆ—ฆๆœ‰ไบ†่ฟ™ไธคไธชๅ‚ๆ•ฐ `tools` ๅ’Œ `model`๏ผŒๆ‚จๅฐฑๅฏไปฅๅˆ›ๅปบไธ€ไธช agent ๅนถ่ฟ่กŒๅฎƒใ€‚ๆ‚จๅฏไปฅไฝฟ็”จไปปไฝ•ๆ‚จๅ–œๆฌข็š„ LLM๏ผŒๆ— ่ฎบๆ˜ฏ้€š่ฟ‡ [Hugging Face API](https://huggingface.co/docs/api-inference/en/index)ใ€[transformers](https://github.com/huggingface/transformers/)ใ€[ollama](https://ollama.com/)๏ผŒ่ฟ˜ๆ˜ฏ [LiteLLM](https://www.litellm.ai/)ใ€‚ +ไธ€ๆ—ฆๆœ‰ไบ†่ฟ™ไธคไธชๅ‚ๆ•ฐ `tools` ๅ’Œ `model`๏ผŒๆ‚จๅฐฑๅฏไปฅๅˆ›ๅปบไธ€ไธช agent ๅนถ่ฟ่กŒๅฎƒใ€‚ๆ‚จๅฏไปฅไฝฟ็”จไปปไฝ•ๆ‚จๅ–œๆฌข็š„ LLM๏ผŒๆ— ่ฎบๆ˜ฏ้€š่ฟ‡ [Hugging Face API](https://huggingface.co/docs/api-inference/en/index)ใ€[transformers](https://github.com/huggingface/transformers/)ใ€[ollama](https://ollama.com/)ใ€[LiteLLM](https://www.litellm.ai/)ใ€[Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)๏ผŒ่ฟ˜ๆ˜ฏ[mlx-lm](https://pypi.org/project/mlx-lm/).ใ€‚ Hugging Face API ๅฏไปฅๅ…่ดนไฝฟ็”จ่€Œๆ— ้œ€ token๏ผŒไฝ†ไผšๆœ‰้€Ÿ็އ้™ๅˆถใ€‚ -่ฆ่ฎฟ้—ฎๅ—้™ๆจกๅž‹ๆˆ–ไฝฟ็”จ PRO ่ดฆๆˆทๆ้ซ˜้€Ÿ็އ้™ๅˆถ๏ผŒๆ‚จ้œ€่ฆ่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡ `HF_TOKEN` ๆˆ–ๅœจๅˆๅง‹ๅŒ– `HfApiModel` ๆ—ถไผ ้€’ `token` ๅ˜้‡ใ€‚ +่ฆ่ฎฟ้—ฎๅ—้™ๆจกๅž‹ๆˆ–ไฝฟ็”จ PRO ่ดฆๆˆทๆ้ซ˜้€Ÿ็އ้™ๅˆถ๏ผŒๆ‚จ้œ€่ฆ่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡ `HF_TOKEN` ๆˆ–ๅœจๅˆๅง‹ๅŒ– `InferenceClientModel` ๆ—ถไผ ้€’ `token` ๅ˜้‡ใ€‚ ```python -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel model_id = "meta-llama/Llama-3.3-70B-Instruct" -model = HfApiModel(model_id=model_id, token="") +model = InferenceClientModel(model_id=model_id, token="") agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.run( @@ -109,6 +96,62 @@ agent.run( "Could you give me the 118th number in the Fibonacci sequence?", ) ``` + + + +่ฆ่ฟžๆŽฅๅˆฐ Azure OpenAI๏ผŒๆ‚จๅฏไปฅ็›ดๆŽฅไฝฟ็”จ `AzureOpenAIServerModel`๏ผŒๆˆ–ไฝฟ็”จ `LiteLLMModel` ๅนถ่ฟ›่กŒ็›ธๅบ”้…็ฝฎใ€‚ + +ๅˆๅง‹ๅŒ– `AzureOpenAIServerModel` ๅฎžไพ‹ๆ—ถ๏ผŒ้œ€่ฆไผ ้€’ๆจกๅž‹้ƒจ็ฝฒๅ็งฐ๏ผŒๅฏ้€‰ๆ‹ฉไปฅไธ‹ไปปไธ€็งๆ–นๅผ๏ผš1.ไผ ้€’ `azure_endpoint`ใ€`api_key` ๅ’Œ `api_version` ๅ‚ๆ•ฐ๏ผ›2.่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡ `AZURE_OPENAI_ENDPOINT`ใ€`AZURE_OPENAI_API_KEY` ๅ’Œ `OPENAI_API_VERSION` + +```python +# !pip install smolagents[openai] +from smolagents import CodeAgent, AzureOpenAIServerModel + +model = AzureOpenAIServerModel(model_id="gpt-4o-mini") +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` + +ไนŸๅฏๆŒ‰ๅฆ‚ไธ‹ๆ–นๅผ้…็ฝฎ `LiteLLMModel` ่ฟžๆŽฅ Azure OpenAI๏ผš + +- ๅฐ†ๆจกๅž‹้ƒจ็ฝฒๅ็งฐไฝœไธบ `model_id` ๅ‚ๆ•ฐไผ ้€’๏ผŒๅนถ็กฎไฟๅ…ถๅ‰็ผ€ไธบ `azure/` +- ็กฎไฟ่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡ `AZURE_API_VERSION` +- ไปป้€‰ๅ…ถไธ€๏ผš1.ไผ ้€’ `api_base` ๅ’Œ `api_key` ๅ‚ๆ•ฐ๏ผ›2.่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡ `AZURE_API_KEY` ๅ’Œ `AZURE_API_BASE` + +```python +import os +from smolagents import CodeAgent, LiteLLMModel + +AZURE_OPENAI_CHAT_DEPLOYMENT_NAME="gpt-35-turbo-16k-deployment" # example of deployment name + +os.environ["AZURE_API_KEY"] = "" # api_key +os.environ["AZURE_API_BASE"] = "" # "https://example-endpoint.openai.azure.com" +os.environ["AZURE_API_VERSION"] = "" # "2024-10-01-preview" + +model = LiteLLMModel(model_id="azure/" + AZURE_OPENAI_CHAT_DEPLOYMENT_NAME) +agent = CodeAgent(tools=[], model=model, add_base_tools=True) + +agent.run( + "Could you give me the 118th number in the Fibonacci sequence?", +) +``` + + + + +```python +# !pip install smolagents[mlx-lm] +from smolagents import CodeAgent, MLXModel + +mlx_model = MLXModel("mlx-community/Qwen2.5-Coder-32B-Instruct-4bit") +agent = CodeAgent(model=mlx_model, tools=[], add_base_tools=True) + +agent.run("Could you give me the 118th number in the Fibonacci sequence?") +``` + @@ -125,6 +168,7 @@ Python ่งฃ้‡Šๅ™จ้ป˜่ฎคไนŸไธๅ…่ฎธๅœจๅฎ‰ๅ…จๅˆ—่กจไน‹ๅค–ๅฏผๅ…ฅ๏ผŒๆ‰€ไปฅๆ‰€ๆœ‰ๆœ€ ```py from smolagents import CodeAgent +model = InferenceClientModel() agent = CodeAgent(tools=[], model=model, additional_authorized_imports=['requests', 'bs4']) agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") ``` @@ -134,7 +178,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co ๅฆ‚ๆžœ็”Ÿๆˆ็š„ไปฃ็ ๅฐ่ฏ•ๆ‰ง่กŒ้žๆณ•ๆ“ไฝœๆˆ–ๅ‡บ็Žฐๅธธ่ง„ Python ้”™่ฏฏ๏ผŒๆ‰ง่กŒๅฐ†ๅœๆญขใ€‚ -ๆ‚จไนŸๅฏไปฅไฝฟ็”จ [E2B ไปฃ็ ๆ‰ง่กŒๅ™จ](https://e2b.dev/docs#what-is-e2-b) ่€Œไธๆ˜ฏๆœฌๅœฐ Python ่งฃ้‡Šๅ™จ๏ผŒ้ฆ–ๅ…ˆ [่ฎพ็ฝฎ `E2B_API_KEY` ็Žฏๅขƒๅ˜้‡](https://e2b.dev/dashboard?tab=keys)๏ผŒ็„ถๅŽๅœจๅˆๅง‹ๅŒ– agent ๆ—ถไผ ้€’ `use_e2b_executor=True`ใ€‚ +ๆ‚จไนŸๅฏไปฅไฝฟ็”จ [E2B ไปฃ็ ๆ‰ง่กŒๅ™จ](https://e2b.dev/docs#what-is-e2-b) ๆˆ– Docker ่€Œไธๆ˜ฏๆœฌๅœฐ Python ่งฃ้‡Šๅ™จใ€‚ๅฏนไบŽ E2B๏ผŒ้ฆ–ๅ…ˆ [่ฎพ็ฝฎ `E2B_API_KEY` ็Žฏๅขƒๅ˜้‡](https://e2b.dev/dashboard?tab=keys)๏ผŒ็„ถๅŽๅœจๅˆๅง‹ๅŒ– agent ๆ—ถไผ ้€’ `executor_type="e2b"`ใ€‚ๅฏนไบŽ Docker๏ผŒๅœจๅˆๅง‹ๅŒ–ๆ—ถไผ ้€’ `executor_type="docker"`ใ€‚ > [!TIP] > ๅœจ [่ฏฅๆ•™็จ‹ไธญ](tutorials/secure_code_execution) ไบ†่งฃๆ›ดๅคšๅ…ณไบŽไปฃ็ ๆ‰ง่กŒ็š„ๅ†…ๅฎนใ€‚ @@ -168,7 +212,7 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co ### ้ป˜่ฎคๅทฅๅ…ท็ฎฑ -`smolagents` ้™„ๅธฆไบ†ไธ€ไธช็”จไบŽๅขžๅผบ agent ็š„้ป˜่ฎคๅทฅๅ…ท็ฎฑ๏ผŒๆ‚จๅฏไปฅๅœจๅˆๅง‹ๅŒ–ๆ—ถ้€š่ฟ‡ๅ‚ๆ•ฐ `add_base_tools = True` ๅฐ†ๅ…ถๆทปๅŠ ๅˆฐๆ‚จ็š„ agent ไธญ๏ผš +`smolagents` ้™„ๅธฆไบ†ไธ€ไธช็”จไบŽๅขžๅผบ agent ็š„้ป˜่ฎคๅทฅๅ…ท็ฎฑ๏ผŒๆ‚จๅฏไปฅๅœจๅˆๅง‹ๅŒ–ๆ—ถ้€š่ฟ‡ๅ‚ๆ•ฐ `add_base_tools=True` ๅฐ†ๅ…ถๆทปๅŠ ๅˆฐๆ‚จ็š„ agent ไธญ๏ผš - **DuckDuckGo ็ฝ‘้กตๆœ็ดข**๏ผšไฝฟ็”จ DuckDuckGo ๆต่งˆๅ™จๆ‰ง่กŒ็ฝ‘้กตๆœ็ดขใ€‚ - **Python ไปฃ็ ่งฃ้‡Šๅ™จ**๏ผšๅœจๅฎ‰ๅ…จ็Žฏๅขƒไธญ่ฟ่กŒ LLM ็”Ÿๆˆ็š„ Python ไปฃ็ ใ€‚ๅชๆœ‰ๅœจไฝฟ็”จ `add_base_tools=True` ๅˆๅง‹ๅŒ– [`ToolCallingAgent`] ๆ—ถๆ‰ไผšๆทปๅŠ ๆญคๅทฅๅ…ท๏ผŒๅ› ไธบๅŸบไบŽไปฃ็ ็š„ agent ๅทฒ็ปๅฏไปฅๅŽŸ็”Ÿๆ‰ง่กŒ Python ไปฃ็  @@ -260,8 +304,8 @@ class ModelDownloadTool(Tool): ็„ถๅŽๆ‚จๅฏไปฅ็›ดๆŽฅๅˆๅง‹ๅŒ–ๆ‚จ็š„ agent๏ผš ```py -from smolagents import CodeAgent, HfApiModel -agent = CodeAgent(tools=[model_download_tool], model=HfApiModel()) +from smolagents import CodeAgent, InferenceClientModel +agent = CodeAgent(tools=[model_download_tool], model=InferenceClientModel()) agent.run( "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" ) @@ -274,7 +318,7 @@ agent.run( โ”‚ Can you give me the name of the model that has the most downloads in the 'text-to-video' โ”‚ โ”‚ task on the Hugging Face Hub? โ”‚ โ”‚ โ”‚ -โ•ฐโ”€ HfApiModel - Qwen/Qwen2.5-Coder-32B-Instruct โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +โ•ฐโ”€ InferenceClientModel - Qwen/Qwen2.5-Coder-32B-Instruct โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” Step 0 โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” โ•ญโ”€ Executing this code: โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ 1 model_name = model_download_tool(task="text-to-video") โ”‚ @@ -311,9 +355,9 @@ Out[20]: 'ByteDance/AnimateDiff-Lightning' ไปฅไธ‹ๆ˜ฏไธ€ไธชไฝฟ็”จๆˆ‘ไปฌ็š„ [`DuckDuckGoSearchTool`] ๅˆถไฝœไธ€ไธช็ฎก็†็‰นๅฎš็ฝ‘้กตๆœ็ดข agent ็š„ agent ็š„็คบไพ‹๏ผš ```py -from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, ManagedAgent +from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, ManagedAgent -model = HfApiModel() +model = InferenceClientModel() web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) @@ -342,14 +386,14 @@ manager_agent.run("Who is the CEO of Hugging Face?") from smolagents import ( load_tool, CodeAgent, - HfApiModel, + InferenceClientModel, GradioUI ) # ไปŽ Hub ๅฏผๅ…ฅๅทฅๅ…ท image_generation_tool = load_tool("m-ric/text-to-image") -model = HfApiModel(model_id) +model = InferenceClientModel(model_id=model_id) # ไฝฟ็”จๅ›พๅƒ็”Ÿๆˆๅทฅๅ…ทๅˆๅง‹ๅŒ– agent agent = CodeAgent(tools=[image_generation_tool], model=model) @@ -364,6 +408,18 @@ GradioUI(agent).launch() ## ไธ‹ไธ€ๆญฅ +ๆœ€ๅŽ๏ผŒๅฝ“ๆ‚จๆŒ‰้œ€้…็ฝฎๅฅฝagentๅŽ๏ผŒๅณๅฏๅฐ†ๅ…ถๅˆ†ไบซ่‡ณ Hub๏ผ + +```py +agent.push_to_hub("m-ric/my_agent") +``` + +็ฑปไผผๅœฐ๏ผŒ่‹ฅ่ฆๅŠ ่ฝฝๅทฒๆŽจ้€่‡ณ Hub ็š„agent๏ผŒๅœจไฟกไปปๅ…ถๅทฅๅ…ทไปฃ็ ็š„ๅ‰ๆไธ‹๏ผŒๅฏไฝฟ็”จ๏ผš + +```py +agent.from_hub("m-ric/my_agent", trust_remote_code=True) +``` + ่ฆๆ›ดๆทฑๅ…ฅๅœฐไฝฟ็”จ๏ผŒๆ‚จๅฐ†้œ€่ฆๆŸฅ็œ‹ๆˆ‘ไปฌ็š„ๆ•™็จ‹๏ผš - [ๆˆ‘ไปฌ็š„ไปฃ็  agent ๅฆ‚ไฝ•ๅทฅไฝœ็š„่งฃ้‡Š](./tutorials/secure_code_execution) - [ๆœฌๆŒ‡ๅ—ๅ…ณไบŽๅฆ‚ไฝ•ๆž„ๅปบๅฅฝ็š„ agent](./tutorials/building_good_agents)ใ€‚ diff --git a/docs/source/zh/index.mdx b/docs/source/zh/index.mdx index d79e8090c..08260bb91 100644 --- a/docs/source/zh/index.mdx +++ b/docs/source/zh/index.mdx @@ -1,18 +1,3 @@ - - # `smolagents` ่ฟ™ๆ˜ฏๆž„ๅปบๅผบๅคง agent ็š„ๆœ€็ฎ€ๅ•ๆก†ๆžถ๏ผ้กบไพฟ้—ฎไธ€ไธ‹๏ผŒไป€ไนˆๆ˜ฏ "agent"๏ผŸๆˆ‘ไปฌๅœจ[ๆญค้กต้ข](conceptual_guides/intro_agents)ๆไพ›ไบ†ๆˆ‘ไปฌ็š„ๅฎšไน‰๏ผŒๆ‚จ่ฟ˜ๅฏไปฅๆ‰พๅˆฐๅ…ณไบŽไฝ•ๆ—ถไฝฟ็”จๆˆ–ไธไฝฟ็”จๅฎƒไปฌ็š„ๅปบ่ฎฎ๏ผˆๅ‰ง้€๏ผš้€šๅธธไธไฝฟ็”จ agent ไผšๆ›ดๅฅฝ๏ผ‰ใ€‚ diff --git a/docs/source/zh/reference/agents.mdx b/docs/source/zh/reference/agents.mdx index bd7f3a779..c4fae3c5c 100644 --- a/docs/source/zh/reference/agents.mdx +++ b/docs/source/zh/reference/agents.mdx @@ -1,19 +1,3 @@ - - # Agents๏ผˆๆ™บ่ƒฝไฝ“๏ผ‰ diff --git a/docs/source/zh/reference/models.mdx b/docs/source/zh/reference/models.mdx index 79c9e72a4..036334140 100644 --- a/docs/source/zh/reference/models.mdx +++ b/docs/source/zh/reference/models.mdx @@ -1,19 +1,3 @@ - - # ๆจกๅž‹ @@ -71,24 +55,24 @@ print(model([{"role": "user", "content": [{"type": "text", "text": "Ok!"}]}], st [[autodoc]] TransformersModel -### HfApiModel +### InferenceClientModel -`HfApiModel` ๅฐ่ฃ…ไบ† huggingface_hub ็š„ [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference)๏ผŒ็”จไบŽๆ‰ง่กŒ LLMใ€‚ๅฎƒๆ”ฏๆŒ HF ็š„ [Inference API](https://huggingface.co/docs/api-inference/index) ไปฅๅŠ Hub ไธŠๆ‰€ๆœ‰ๅฏ็”จ็š„[Inference Providers](https://huggingface.co/blog/inference-providers)ใ€‚ +`InferenceClientModel` ๅฐ่ฃ…ไบ† huggingface_hub ็š„ [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference)๏ผŒ็”จไบŽๆ‰ง่กŒ LLMใ€‚ๅฎƒๆ”ฏๆŒ HF ็š„ [Inference API](https://huggingface.co/docs/api-inference/index) ไปฅๅŠ Hub ไธŠๆ‰€ๆœ‰ๅฏ็”จ็š„[Inference Providers](https://huggingface.co/blog/inference-providers)ใ€‚ ```python -from smolagents import HfApiModel +from smolagents import InferenceClientModel messages = [ {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]} ] -model = HfApiModel() +model = InferenceClientModel() print(model(messages)) ``` ```text >>> Of course! If you change your mind, feel free to reach out. Take care! ``` -[[autodoc]] HfApiModel +[[autodoc]] InferenceClientModel ### LiteLLMModel @@ -101,7 +85,7 @@ messages = [ {"role": "user", "content": [{"type": "text", "text": "Hello, how are you?"}]} ] -model = LiteLLMModel("anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) +model = LiteLLMModel(model_id="anthropic/claude-3-5-sonnet-latest", temperature=0.2, max_tokens=10) print(model(messages)) ``` diff --git a/docs/source/zh/reference/tools.mdx b/docs/source/zh/reference/tools.mdx index 86f19dca4..9306eb322 100644 --- a/docs/source/zh/reference/tools.mdx +++ b/docs/source/zh/reference/tools.mdx @@ -1,19 +1,3 @@ - - # ๅทฅๅ…ท diff --git a/docs/source/zh/tutorials/building_good_agents.mdx b/docs/source/zh/tutorials/building_good_agents.mdx index fbf489fae..a70d251ce 100644 --- a/docs/source/zh/tutorials/building_good_agents.mdx +++ b/docs/source/zh/tutorials/building_good_agents.mdx @@ -1,18 +1,3 @@ - # ๆž„ๅปบๅฅฝ็”จ็š„ agent [[open-in-colab]] @@ -120,11 +105,11 @@ def get_weather_api(location: str, date_time: str) -> str: ้™คไบ†็ฎ€ๅ•็š„ไปปๅŠกๆ่ฟฐๅญ—็ฌฆไธฒๅค–๏ผŒไฝ ่ฟ˜ๅฏไปฅไฝฟ็”จ `additional_args` ๅ‚ๆ•ฐไผ ้€’ไปปไฝ•็ฑปๅž‹็š„ๅฏน่ฑก๏ผš ```py -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel model_id = "meta-llama/Llama-3.3-70B-Instruct" -agent = CodeAgent(tools=[], model=HfApiModel(model_id=model_id), add_base_tools=True) +agent = CodeAgent(tools=[], model=InferenceClientModel(model_id=model_id), add_base_tools=True) agent.run( "Why does Mike not know many people in New York?", @@ -209,13 +194,152 @@ In the end you have to return a final answer using the `final_answer` tool. Here are a few examples using notional tools: --- -{examples} +Task: "Generate an image of the oldest person in this document." -Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: +Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer. +Code: +```py +answer = document_qa(document=document, question="Who is the oldest person mentioned?") +print(answer) +``` +Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." -{{tool_descriptions}} +Thought: I will now generate an image showcasing the oldest person. +Code: +```py +image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.") +final_answer(image) +``` + +--- +Task: "What is the result of the following operation: 5 + 3 + 1294.678?" -{{managed_agents_descriptions}} +Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool +Code: +```py +result = 5 + 3 + 1294.678 +final_answer(result) +``` + +--- +Task: +"Answer the question in the variable `question` about the image stored in the variable `image`. The question is in French. +You have been provided with these additional arguments, that you can access using the keys as variables in your python code: +{'question': 'Quel est l'animal sur l'image?', 'image': 'path/to/image.jpg'}" + +Thought: I will use the following tools: `translator` to translate the question into English and then `image_qa` to answer the question on the input image. +Code: +```py +translated_question = translator(question=question, src_lang="French", tgt_lang="English") +print(f"The translated question is {translated_question}.") +answer = image_qa(image=image, question=translated_question) +final_answer(f"The answer is {answer}") +``` + +--- +Task: +In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer. +What does he say was the consequence of Einstein learning too much math on his creativity, in one word? + +Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin. +Code: +```py +pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein") +print(pages) +``` +Observation: +No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein". + +Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query. +Code: +```py +pages = search(query="1979 interview Stanislaus Ulam") +print(pages) +``` +Observation: +Found 6 pages: +[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/) + +[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/) + +(truncated) + +Thought: I will read the first 2 pages to know more. +Code: +```py +for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]: + whole_page = visit_webpage(url) + print(whole_page) + print("\n" + "="*80 + "\n") # Print separator between pages +``` +Observation: +Manhattan Project Locations: +Los Alamos, NM +Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at +(truncated) + +Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word. +Code: +```py +final_answer("diminished") +``` + +--- +Task: "Which city has the highest population: Guangzhou or Shanghai?" + +Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities. +Code: +```py +for city in ["Guangzhou", "Shanghai"]: + print(f"Population {city}:", search(f"{city} population") +``` +Observation: +Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] +Population Shanghai: '26 million (2019)' + +Thought: Now I know that Shanghai has the highest population. +Code: +```py +final_answer("Shanghai") +``` + +--- +Task: "What is the current age of the pope, raised to the power 0.36?" + +Thought: I will use the tool `wiki` to get the age of the pope, and confirm that with a web search. +Code: +```py +pope_age_wiki = wiki(query="current pope age") +print("Pope age as per wikipedia:", pope_age_wiki) +pope_age_search = web_search(query="current pope age") +print("Pope age as per google search:", pope_age_search) +``` +Observation: +Pope age: "The pope Francis is currently 88 years old." + +Thought: I know that the pope is 88 years old. Let's compute the result using python code. +Code: +```py +pope_current_age = 88 ** 0.36 +final_answer(pope_current_age) +``` + +Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: +{%- for tool in tools.values() %} +- {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.inputs}} + Returns an output of type: {{tool.output_type}} +{%- endfor %} + +{%- if managed_agents and managed_agents.values() | list %} +You can also give tasks to team members. +Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. +Given that this team member is a real human, you should be very verbose in your task. +Here is a list of the team members that you can call: +{%- for agent in managed_agents.values() %} +- {{ agent.name }}: {{ agent.description }} +{%- endfor %} +{%- endif %} Here are the rules you should always follow to solve your task: 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```' sequence, else you will fail. @@ -224,7 +348,7 @@ Here are the rules you should always follow to solve your task: 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block. 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters. 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'. -7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables. +7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables. 8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}} 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. 10. Don't give up! You're in charge of solving the task, not providing directions to solve it. @@ -232,11 +356,29 @@ Here are the rules you should always follow to solve your task: Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. ``` -ๅฆ‚ไฝ ๆ‰€่ง๏ผŒๆœ‰ไธ€ไบ›ๅ ไฝ็ฌฆ๏ผŒๅฆ‚ `"{{tool_descriptions}}"`๏ผš่ฟ™ไบ›ๅฐ†ๅœจ agent ๅˆๅง‹ๅŒ–ๆ—ถ็”จไบŽๆ’ๅ…ฅๆŸไบ›่‡ชๅŠจ็”Ÿๆˆ็š„ๅทฅๅ…ทๆˆ–็ฎก็† agent ็š„ๆ่ฟฐใ€‚ +ๅฆ‚ไฝ ๆ‰€่ง๏ผŒๆœ‰ไธ€ไบ›ๅ ไฝ็ฌฆ๏ผŒๅฆ‚ `"{{ tool.description }}"`๏ผš่ฟ™ไบ›ๅฐ†ๅœจ agent ๅˆๅง‹ๅŒ–ๆ—ถ็”จไบŽๆ’ๅ…ฅๆŸไบ›่‡ชๅŠจ็”Ÿๆˆ็š„ๅทฅๅ…ทๆˆ–็ฎก็† agent ็š„ๆ่ฟฐใ€‚ ๅ› ๆญค๏ผŒ่™ฝ็„ถไฝ ๅฏไปฅ้€š่ฟ‡ๅฐ†่‡ชๅฎšไน‰ๆ็คบไฝœไธบๅ‚ๆ•ฐไผ ้€’็ป™ `system_prompt` ๅ‚ๆ•ฐๆฅ่ฆ†็›–ๆญค็ณป็ปŸๆ็คบๆจกๆฟ๏ผŒไฝ†ไฝ ็š„ๆ–ฐ็ณป็ปŸๆ็คบๅฟ…้กปๅŒ…ๅซไปฅไธ‹ๅ ไฝ็ฌฆ๏ผš -- `"{{tool_descriptions}}"` ็”จไบŽๆ’ๅ…ฅๅทฅๅ…ทๆ่ฟฐใ€‚ -- `"{{managed_agents_description}}"` ็”จไบŽๆ’ๅ…ฅ managed agent ็š„ๆ่ฟฐ๏ผˆๅฆ‚ๆžœๆœ‰๏ผ‰ใ€‚ +- ็”จไบŽๆ’ๅ…ฅๅทฅๅ…ทๆ่ฟฐใ€‚ + ``` + {%- for tool in tools.values() %} + - {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.inputs}} + Returns an output of type: {{tool.output_type}} + {%- endfor %} + ``` +- ็”จไบŽๆ’ๅ…ฅ managed agent ็š„ๆ่ฟฐ๏ผˆๅฆ‚ๆžœๆœ‰๏ผ‰ใ€‚ + ``` + {%- if managed_agents and managed_agents.values() | list %} + You can also give tasks to team members. + Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. + Given that this team member is a real human, you should be very verbose in your task. + Here is a list of the team members that you can call: + {%- for agent in managed_agents.values() %} + - {{ agent.name }}: {{ agent.description }} + {%- endfor %} + {%- endif %} + ``` - ไป…้™ `CodeAgent`๏ผš`"{{authorized_imports}}"` ็”จไบŽๆ’ๅ…ฅๆŽˆๆƒๅฏผๅ…ฅๅˆ—่กจใ€‚ ็„ถๅŽไฝ ๅฏไปฅๆ นๆฎๅฆ‚ไธ‹๏ผŒๆ›ดๆ”น็ณป็ปŸๆ็คบ๏ผš @@ -253,7 +395,7 @@ agent.prompt_templates["system_prompt"] = agent.prompt_templates["system_prompt" ๆˆ‘ไปฌๆไพ›ไบ†ไธ€ไธช็”จไบŽ่กฅๅ……่ง„ๅˆ’ๆญฅ้ชค็š„ๆจกๅž‹๏ผŒagent ๅฏไปฅๅœจๆญฃๅธธๆ“ไฝœๆญฅ้ชคไน‹้—ดๅฎšๆœŸ่ฟ่กŒใ€‚ๅœจๆญคๆญฅ้ชคไธญ๏ผŒๆฒกๆœ‰ๅทฅๅ…ท่ฐƒ็”จ๏ผŒLLM ๅชๆ˜ฏ่ขซ่ฆๆฑ‚ๆ›ดๆ–ฐๅฎƒ็Ÿฅ้“็š„ไบ‹ๅฎžๅˆ—่กจ๏ผŒๅนถๆ นๆฎ่ฟ™ไบ›ไบ‹ๅฎžๅๆŽจๅฎƒๅบ”่ฏฅ้‡‡ๅ–็š„ไธ‹ไธ€ๆญฅใ€‚ ```py -from smolagents import load_tool, CodeAgent, HfApiModel, DuckDuckGoSearchTool +from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool from dotenv import load_dotenv load_dotenv() @@ -265,7 +407,7 @@ search_tool = DuckDuckGoSearchTool() agent = CodeAgent( tools=[search_tool], - model=HfApiModel("Qwen/Qwen2.5-72B-Instruct"), + model=InferenceClientModel(model_id="Qwen/Qwen2.5-72B-Instruct"), planning_interval=3 # ่ฟ™ๆ˜ฏไฝ ๆฟ€ๆดป่ง„ๅˆ’็š„ๅœฐๆ–น๏ผ ) diff --git a/docs/source/zh/tutorials/inspect_runs.mdx b/docs/source/zh/tutorials/inspect_runs.mdx new file mode 100644 index 000000000..ea3eb659b --- /dev/null +++ b/docs/source/zh/tutorials/inspect_runs.mdx @@ -0,0 +1,180 @@ +# ไฝฟ็”จ OpenTelemetry ๆฃ€ๆŸฅ่ฟ่กŒ่ฎฐๅฝ• + +[[open-in-colab]] + +> [!TIP] +> ๅฆ‚ๆžœๆ‚จๆ˜ฏๅˆๆฌกๆž„ๅปบAgent๏ผŒๅปบ่ฎฎๅ…ˆ้˜…่ฏป [Agent ๅ…ฅ้—จๆŒ‡ๅ—](../conceptual_guides/intro_agents) ๅ’Œ [smolagents ๅฏผ่งˆ](../guided_tour)ใ€‚ + +## ไธบไป€ไนˆ้œ€่ฆ่ฎฐๅฝ•Agent่ฟ่กŒ๏ผŸ + +่ฐƒ่ฏ•Agent่ฟ่กŒ่ฟ‡็จ‹ๅ…ทๆœ‰ๆŒ‘ๆˆ˜ๆ€งใ€‚ + +้ชŒ่ฏ่ฟ่กŒๆ˜ฏๅฆๆญฃๅธธ่ฟ›่กŒๅพˆๅ›ฐ้šพ๏ผŒๅ› ไธบAgent็š„ๅทฅไฝœๆต็จ‹ๆœฌ่บซๅ…ทๆœ‰ [่ฎพ่ฎกไธŠ็š„ไธๅฏ้ข„ๆต‹ๆ€ง](../conceptual_guides/intro_agents)๏ผˆๅฆ‚ๆžœๅฏ้ข„ๆต‹๏ผŒ็›ดๆŽฅไฝฟ็”จไผ ็ปŸไปฃ็ ๅณๅฏ๏ผ‰ใ€‚ + +ๆฃ€ๆŸฅ่ฟ่กŒ่ฎฐๅฝ•ๅŒๆ ทๅ›ฐ้šพ๏ผšๅคšๆญฅ้ชค็š„Agentๅพ€ๅพ€ไผšๅฟซ้€ŸๅœจๆŽงๅˆถๅฐ็”Ÿๆˆๅคง้‡ๆ—ฅๅฟ—๏ผŒ่€Œๅคงๅคšๆ•ฐ้”™่ฏฏๅชๆ˜ฏ"LLM ไฝŽ็บง้”™่ฏฏ"็ฑปๅž‹็š„้—ฎ้ข˜๏ผŒ้€šๅธธLLMไผšๅœจๅŽ็ปญๆญฅ้ชคไธญ้€š่ฟ‡็”Ÿๆˆๆ›ดๅฅฝ็š„ไปฃ็ ๆˆ–ๅทฅๅ…ท่ฐƒ็”จๆฅ่‡ชๆˆ‘ไฟฎๆญฃใ€‚ + +ๅ› ๆญค๏ผŒๅœจ็”Ÿไบง็Žฏๅขƒไธญไฝฟ็”จ็›‘ๆŽงๅทฅๅ…ท่ฎฐๅฝ•Agent่ฟ่กŒ่ฟ‡็จ‹๏ผŒๅฏนไบŽๅŽ็ปญๆฃ€ๆŸฅๅ’Œๅˆ†ๆž่‡ณๅ…ณ้‡่ฆ๏ผ + +ๆˆ‘ไปฌ้‡‡็”จ [OpenTelemetry](https://opentelemetry.io/) ๆ ‡ๅ‡†ๆฅๅฎž็ŽฐAgent่ฟ่กŒ็›‘ๆŽงใ€‚ + +่ฟ™ๆ„ๅ‘ณ็€ๆ‚จๅช้œ€ๆทปๅŠ ๅฐ‘้‡็›‘ๆŽงไปฃ็ ๏ผŒๅณๅฏๅœจๆญฃๅธธ่ฟ่กŒAgentๆ—ถ่‡ชๅŠจ่ฎฐๅฝ•ๆ‰€ๆœ‰ไฟกๆฏๅˆฐ็›‘ๆŽงๅนณๅฐใ€‚ไปฅไธ‹ๆ˜ฏๅœจไธๅŒOpenTelemetryๅŽ็ซฏๅฎž็ŽฐๆญคๅŠŸ่ƒฝ็š„็คบไพ‹๏ผš + +ๅœจ็›‘ๆŽงๅนณๅฐไธŠ็š„ๅฑ•็คบๆ•ˆๆžœๅฆ‚ไธ‹๏ผš + +
+ +
+ + +## ไฝฟ็”จ Arize AI Phoenix ้…็ฝฎ้ฅๆต‹ + +้ฆ–ๅ…ˆๅฎ‰่ฃ…ๅฟ…่ฆ็š„่ฝฏไปถๅŒ…ใ€‚่ฟ™้‡Œๆˆ‘ไปฌ้€‰ๆ‹ฉๅฎ‰่ฃ… [Arize AI ็š„ Phoenix](https://github.com/Arize-ai/phoenix) ไฝœไธบๆ—ฅๅฟ—ๆ”ถ้›†ๅ’Œๆฃ€ๆŸฅๆ–นๆกˆ๏ผŒๆ‚จไนŸๅฏไปฅไฝฟ็”จๅ…ถไป–ๅ…ผๅฎน OpenTelemetry ็š„ๅนณๅฐๆฅๅฎŒๆˆๆ”ถ้›†ไธŽๆฃ€ๆŸฅๅทฅไฝœใ€‚ + +```shell +pip install 'smolagents[telemetry]' +``` + +ๆŽฅ็€ๅœจๅŽๅฐ่ฟ่กŒๆ—ฅๅฟ—ๆ”ถ้›†ๅ™จ๏ผš + +```shell +python -m phoenix.server.main serve +``` + +ๆœ€ๅŽ้…็ฝฎ `SmolagentsInstrumentor` ๆฅ่ฟฝ่ธชAgentๆดปๅŠจ๏ผŒๅนถๅฐ†่ฟฝ่ธชๆ•ฐๆฎๅ‘้€่‡ณ Phoenix ้ป˜่ฎค็ซฏ็‚น๏ผš + +```python +from phoenix.otel import register +from openinference.instrumentation.smolagents import SmolagentsInstrumentor + +register() +SmolagentsInstrumentor().instrument() +``` + +ๅฎŒๆˆไธŠ่ฟฐ้…็ฝฎๅŽ๏ผŒๅณๅฏๆญฃๅธธ่ฟ่กŒๆ‚จ็š„Agent๏ผ + +```py +from smolagents import ( + CodeAgent, + ToolCallingAgent, + DuckDuckGoSearchTool, + VisitWebpageTool, + InferenceClientModel, +) + +model = InferenceClientModel() + +search_agent = ToolCallingAgent( + tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + model=model, + name="search_agent", + description="This is an agent that can do web search.", +) + +manager_agent = CodeAgent( + tools=[], + model=model, + managed_agents=[search_agent], +) +manager_agent.run( + "If the US keeps its 2024 growth rate, how many years will it take for the GDP to double?" +) +``` +Voilร ! + +ๆญคๆ—ถ่ฎฟ้—ฎ `http://0.0.0.0:6006/projects/` ๅณๅฏๆŸฅ็œ‹่ฟ่กŒ่ฎฐๅฝ•๏ผš + + + +ๅฆ‚ๅ›พๆ‰€็คบ๏ผŒCodeAgent ่ฐƒ็”จไบ†ๅ…ถๆ‰˜็ฎก็š„ ToolCallingAgent๏ผˆๆณจ๏ผšๆ‰˜็ฎกAgentไนŸๅฏไปฅๆ˜ฏๅฆไธ€ไธช CodeAgent๏ผ‰ๆ‰ง่กŒ็พŽๅ›ฝ2024ๅนด็ปๆตŽๅขž้•ฟ็އ็š„็ฝ‘็ปœๆœ็ดขใ€‚ๆ‰˜็ฎกAgent่ฟ”ๅ›žๆŠฅๅ‘ŠๅŽ๏ผŒ็ฎก็†Agentๆ นๆฎ็ป“ๆžœ่ฎก็ฎ—ๅ‡บ็ปๆตŽ็ฟปๅ€ๅ‘จๆœŸ๏ผๆ˜ฏไธๆ˜ฏๅพˆๆ™บ่ƒฝ๏ผŸ + +## ไฝฟ็”จ Langfuse ้…็ฝฎ้ฅๆต‹ + +ๆœฌ้ƒจๅˆ†ๆผ”็คบๅฆ‚ไฝ•้€š่ฟ‡ `SmolagentsInstrumentor` ไฝฟ็”จ **Langfuse** ็›‘ๆŽงๅ’Œ่ฐƒ่ฏ• Hugging Face **smolagents**ใ€‚ + +> **Langfuse ๆ˜ฏไป€ไนˆ๏ผŸ** [Langfuse](https://langfuse.com) ๆ˜ฏ้ขๅ‘LLMๅทฅ็จ‹็š„ๅผ€ๆบๅนณๅฐ๏ผŒๆไพ›AI Agent็š„่ฟฝ่ธชไธŽ็›‘ๆŽงๅŠŸ่ƒฝ๏ผŒๅธฎๅŠฉๅผ€ๅ‘่€…่ฐƒ่ฏ•ใ€ๅˆ†ๆžๅ’Œไผ˜ๅŒ–ไบงๅ“ใ€‚่ฏฅๅนณๅฐ้€š่ฟ‡ๅŽŸ็”Ÿ้›†ๆˆใ€OpenTelemetry ๅ’Œ SDKs ไธŽๅ„็ฑปๅทฅๅ…ทๆก†ๆžถๅฏนๆŽฅใ€‚ + +### ๆญฅ้ชค 1: ๅฎ‰่ฃ…ไพ่ต– + +```python +%pip install smolagents +%pip install opentelemetry-sdk opentelemetry-exporter-otlp openinference-instrumentation-smolagents +``` + +### ๆญฅ้ชค 2: ้…็ฝฎ็Žฏๅขƒๅ˜้‡ + +่ฎพ็ฝฎ Langfuse API ๅฏ†้’ฅ๏ผŒๅนถ้…็ฝฎ OpenTelemetry ็ซฏ็‚นๅฐ†่ฟฝ่ธชๆ•ฐๆฎๅ‘้€่‡ณ Langfuseใ€‚้€š่ฟ‡ๆณจๅ†Œ [Langfuse Cloud](https://cloud.langfuse.com) ๆˆ– [่‡ชๆ‰˜็ฎก Langfuse](https://langfuse.com/self-hosting) ่Žทๅ– API ๅฏ†้’ฅใ€‚ + +ๅŒๆ—ถ้œ€ๆทปๅŠ  [Hugging Face ไปค็‰Œ](https://huggingface.co/settings/tokens) (`HF_TOKEN`) ไฝœไธบ็Žฏๅขƒๅ˜้‡๏ผš +```python +import os +import base64 + +LANGFUSE_PUBLIC_KEY="pk-lf-..." +LANGFUSE_SECRET_KEY="sk-lf-..." +LANGFUSE_AUTH=base64.b64encode(f"{LANGFUSE_PUBLIC_KEY}:{LANGFUSE_SECRET_KEY}".encode()).decode() + +os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://cloud.langfuse.com/api/public/otel" # EU data region +# os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://us.cloud.langfuse.com/api/public/otel" # US data region +os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}" + +# your Hugging Face token +os.environ["HF_TOKEN"] = "hf_..." +``` + +### ๆญฅ้ชค 3: ๅˆๅง‹ๅŒ– `SmolagentsInstrumentor` + +ๅœจๅบ”็”จ็จ‹ๅบไปฃ็ ๆ‰ง่กŒๅ‰ๅˆๅง‹ๅŒ– `SmolagentsInstrumentor`ใ€‚้…็ฝฎ `tracer_provider` ๅนถๆทปๅŠ  span processor ๅฐ†่ฟฝ่ธชๆ•ฐๆฎๅฏผๅ‡บ่‡ณ Langfuseใ€‚`OTLPSpanExporter()` ไผš่‡ชๅŠจไฝฟ็”จ็Žฏๅขƒๅ˜้‡ไธญ้…็ฝฎ็š„็ซฏ็‚นๅ’Œ่ฏทๆฑ‚ๅคดใ€‚ + + +```python +from opentelemetry.sdk.trace import TracerProvider + +from openinference.instrumentation.smolagents import SmolagentsInstrumentor +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + +trace_provider = TracerProvider() +trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter())) + +SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) +``` + +### ๆญฅ้ชค 4: ่ฟ่กŒ smolagent + +```python +from smolagents import ( + CodeAgent, + ToolCallingAgent, + DuckDuckGoSearchTool, + VisitWebpageTool, + InferenceClientModel, +) + +model = InferenceClientModel( + model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" +) + +search_agent = ToolCallingAgent( + tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + model=model, + name="search_agent", + description="This is an agent that can do web search.", +) + +manager_agent = CodeAgent( + tools=[], + model=model, + managed_agents=[search_agent], +) +manager_agent.run( + "How can Langfuse be used to monitor and improve the reasoning and decision-making of smolagents when they execute multi-step tasks, like dynamically adjusting a recipe based on user feedback or available ingredients?" +) +``` + +### ๆญฅ้ชค 5: ๅœจ Langfuse ไธญๆŸฅ็œ‹่ฟฝ่ธช่ฎฐๅฝ• + +่ฟ่กŒAgentๅŽ๏ผŒๆ‚จๅฏไปฅๅœจ [Langfuse](https://cloud.langfuse.com) ๅนณๅฐๆŸฅ็œ‹ smolagents ๅบ”็”จ็”Ÿๆˆ็š„่ฟฝ่ธช่ฎฐๅฝ•ใ€‚่ฟ™ไบ›่ฎฐๅฝ•ไผš่ฏฆ็ป†ๅฑ•็คบLLM็š„ไบคไบ’ๆญฅ้ชค๏ผŒๅธฎๅŠฉๆ‚จ่ฐƒ่ฏ•ๅ’Œไผ˜ๅŒ–AIไปฃ็†ใ€‚ + +![smolagents ่ฟฝ่ธช็คบไพ‹](https://langfuse.com/images/cookbook/integration-smolagents/smolagent_example_trace.png) + +_[Langfuse ๅ…ฌๅผ€็คบไพ‹่ฟฝ่ธช](https://cloud.langfuse.com/project/cloramnkj0002jz088vzn1ja4/traces/ce5160f9bfd5a6cd63b07d2bfcec6f54?timestamp=2025-02-11T09%3A25%3A45.163Z&display=details)_ \ No newline at end of file diff --git a/docs/source/zh/tutorials/memory.mdx b/docs/source/zh/tutorials/memory.mdx new file mode 100644 index 000000000..de2bdc8c3 --- /dev/null +++ b/docs/source/zh/tutorials/memory.mdx @@ -0,0 +1,131 @@ +# ๐Ÿ“š ็ฎก็†Agent็š„่ฎฐๅฟ† + +[[open-in-colab]] + +ๅฝ’ๆ น็ป“ๅบ•๏ผŒAgentๅฏไปฅๅฎšไน‰ไธบ็”ฑๅ‡ ไธช็ฎ€ๅ•็ป„ไปถๆž„ๆˆ๏ผšๅฎƒๆ‹ฅๆœ‰ๅทฅๅ…ทใ€ๆ็คบ่ฏใ€‚ๆœ€้‡่ฆ็š„ๆ˜ฏ๏ผŒๅฎƒๅ…ทๅค‡ๅฏน่ฟ‡ๅพ€ๆญฅ้ชค็š„่ฎฐๅฟ†๏ผŒ่ƒฝๅคŸ่ฟฝๆบฏๅฎŒๆ•ด็š„่ง„ๅˆ’ใ€ๆ‰ง่กŒๅ’Œ้”™่ฏฏๅކๅฒใ€‚ + +### ๅ›žๆ”พAgent็š„่ฎฐๅฟ† + +ๆˆ‘ไปฌๆไพ›ไบ†ๅคš้กนๅŠŸ่ƒฝๆฅๅฎกๆŸฅAgent็š„่ฟ‡ๅพ€่ฟ่กŒ่ฎฐๅฝ•ใ€‚ + +ๆ‚จๅฏไปฅ้€š่ฟ‡ๆ’่ฃ…๏ผˆinstrumentation๏ผ‰ๅœจๅฏ่ง†ๅŒ–็•Œ้ขไธญๆŸฅ็œ‹Agent็š„่ฟ่กŒ่ฟ‡็จ‹๏ผŒ่ฏฅ็•Œ้ขๆ”ฏๆŒๅฏน็‰นๅฎšๆญฅ้ชค่ฟ›่กŒ็ผฉๆ”พๆ“ไฝœ๏ผŒๅ…ทไฝ“ๆ–นๆณ•ๅ‚่ง[ๆ’่ฃ…ๆŒ‡ๅ—](./inspect_runs)ใ€‚ + +ๆ‚จไนŸๅฏไปฅไฝฟ็”จ`agent.replay()`ๆ–นๆณ•ๅฎž็Žฐๅ›žๆ”พ๏ผš + +ๅฝ“AgentๅฎŒๆˆ่ฟ่กŒๅŽ๏ผš +```py +from smolagents import InferenceClientModel, CodeAgent + +agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=0) + +result = agent.run("What's the 20th Fibonacci number?") +``` + +่‹ฅ่ฆๅ›žๆ”พๆœ€่ฟ‘ไธ€ๆฌก่ฟ่กŒ๏ผŒๅช้œ€ไฝฟ็”จ๏ผš +```py +agent.replay() +``` + +### ๅŠจๆ€ไฟฎๆ”นAgent็š„่ฎฐๅฟ† + +่ฎธๅคš้ซ˜็บงๅบ”็”จๅœบๆ™ฏ้œ€่ฆๅฏนAgent็š„่ฎฐๅฟ†่ฟ›่กŒๅŠจๆ€ไฟฎๆ”นใ€‚ + +ๆ‚จๅฏไปฅ้€š่ฟ‡ไปฅไธ‹ๆ–นๅผ่ฎฟ้—ฎAgent็š„่ฎฐๅฟ†๏ผš + +```py +from smolagents import ActionStep + +system_prompt_step = agent.memory.system_prompt +print("The system prompt given to the agent was:") +print(system_prompt_step.system_prompt) + +task_step = agent.memory.steps[0] +print("\n\nThe first task step was:") +print(task_step.task) + +for step in agent.memory.steps: + if isinstance(step, ActionStep): + if step.error is not None: + print(f"\nStep {step.step_number} got this error:\n{step.error}\n") + else: + print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n") +``` + +ไฝฟ็”จ`agent.memory.get_full_steps()`ๅฏ่Žทๅ–ๅฎŒๆ•ดๆญฅ้ชคๅญ—ๅ…ธๆ•ฐๆฎใ€‚ + +ๆ‚จ่ฟ˜ๅฏไปฅ้€š่ฟ‡ๆญฅ้ชคๅ›ž่ฐƒ๏ผˆstep callbacks๏ผ‰ๅฎž็Žฐ่ฎฐๅฟ†็š„ๅŠจๆ€ไฟฎๆ”นใ€‚ + +ๆญฅ้ชคๅ›ž่ฐƒๅ‡ฝๆ•ฐๅฏ้€š่ฟ‡ๅ‚ๆ•ฐ็›ดๆŽฅ่ฎฟ้—ฎ`agent`ๅฏน่ฑก๏ผŒๅ› ๆญค่ƒฝๅคŸ่ฎฟ้—ฎๆ‰€ๆœ‰่ฎฐๅฟ†ๆญฅ้ชคๅนถๆ นๆฎ้œ€่ฆ่ฟ›่กŒไฟฎๆ”นใ€‚ไพ‹ๅฆ‚๏ผŒๅ‡่ฎพๆ‚จๆญฃๅœจ็›‘ๆŽง็ฝ‘้กตๆต่งˆAgentๆฏไธชๆญฅ้ชค็š„ๅฑๅน•ๆˆชๅ›พ๏ผŒๅธŒๆœ›ไฟ็•™ๆœ€ๆ–ฐๆˆชๅ›พๅŒๆ—ถๅˆ ้™คๆ—งๆญฅ้ชค็š„ๅ›พ็‰‡ไปฅ่Š‚็œtokenๆถˆ่€—ใ€‚ + +ๅฏๅ‚่€ƒไปฅไธ‹ไปฃ็ ็คบไพ‹๏ผš +_ๆณจ๏ผšๆญคไปฃ็ ็‰‡ๆฎตไธๅฎŒๆ•ด๏ผŒ้ƒจๅˆ†ๅฏผๅ…ฅ่ฏญๅฅๅ’Œๅฏน่ฑกๅฎšไน‰ๅทฒ็ฒพ็ฎ€๏ผŒๅฎŒๆ•ดไปฃ็ ่ฏท่ฎฟ้—ฎ[ๅŽŸๅง‹่„šๆœฌ](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py)_ + +```py +import helium +from PIL import Image +from io import BytesIO +from time import sleep + +def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: + sleep(1.0) # Let JavaScript animations happen before taking the screenshot + driver = helium.get_driver() + latest_step = memory_step.step_number + for previous_memory_step in agent.memory.steps: # Remove previous screenshots from logs for lean processing + if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2: + previous_memory_step.observations_images = None + png_bytes = driver.get_screenshot_as_png() + image = Image.open(BytesIO(png_bytes)) + memory_step.observations_images = [image.copy()] +``` + +ๆœ€ๅŽๅœจๅˆๅง‹ๅŒ–Agentๆ—ถ๏ผŒๅฐ†ๆญคๅ‡ฝๆ•ฐไผ ๅ…ฅ`step_callbacks`ๅ‚ๆ•ฐ๏ผš + +```py +CodeAgent( + tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f], + model=model, + additional_authorized_imports=["helium"], + step_callbacks=[update_screenshot], + max_steps=20, + verbosity_level=2, +) +``` + +่ฏท่ฎฟ้—ฎๆˆ‘ไปฌ็š„ [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) ๆŸฅ็œ‹ๅฎŒๆ•ดๅฏ่ฟ่กŒ็คบไพ‹ใ€‚ + +### ๅˆ†ๆญฅ่ฟ่กŒ Agents + +ๅฝ“ๆ‚จ้œ€่ฆๅค„็†่€—ๆ—ถๆ•ฐๅคฉ็š„ๅทฅๅ…ท่ฐƒ็”จๆ—ถ๏ผŒ่ฟ™็งๆ–นๅผ็‰นๅˆซๆœ‰็”จ๏ผšๆ‚จๅฏไปฅ้€ๆญฅๆ‰ง่กŒAgentsใ€‚่ฟ™่ฟ˜ๅ…่ฎธๆ‚จๅœจๆฏไธ€ๆญฅๆ›ดๆ–ฐ่ฎฐๅฟ†ใ€‚ + +```py +from smolagents import InferenceClientModel, CodeAgent, ActionStep, TaskStep + +agent = CodeAgent(tools=[], model=InferenceClientModel(), verbosity_level=1) +print(agent.memory.system_prompt) + +task = "What is the 20th Fibonacci number?" + +# You could modify the memory as needed here by inputting the memory of another agent. +# agent.memory.steps = previous_agent.memory.steps + +# Let's start a new task! +agent.memory.steps.append(TaskStep(task=task, task_images=[])) + +final_answer = None +step_number = 1 +while final_answer is None and step_number <= 10: + memory_step = ActionStep( + step_number=step_number, + observations_images=[], + ) + # Run one step. + final_answer = agent.step(memory_step) + agent.memory.steps.append(memory_step) + step_number += 1 + + # Change the memory as you please! + # For instance to update the latest step: + # agent.memory.steps[-1] = ... + +print("The final answer is:", final_answer) +``` \ No newline at end of file diff --git a/docs/source/zh/tutorials/secure_code_execution.mdx b/docs/source/zh/tutorials/secure_code_execution.mdx index 6017aefb9..93e80986a 100644 --- a/docs/source/zh/tutorials/secure_code_execution.mdx +++ b/docs/source/zh/tutorials/secure_code_execution.mdx @@ -1,18 +1,3 @@ - # ๅฎ‰ๅ…จไปฃ็ ๆ‰ง่กŒ [[open-in-colab]] @@ -41,7 +26,7 @@ rendered properly in your Markdown viewer. ### ๆœฌๅœฐ Python ่งฃ้‡Šๅ™จ ้ป˜่ฎคๆƒ…ๅ†ตไธ‹๏ผŒ`CodeAgent` ไผšๅœจไฝ ็š„็Žฏๅขƒไธญ่ฟ่กŒ LLM ็”Ÿๆˆ็š„ไปฃ็ ใ€‚ -่ฟ™ไธชๆ‰ง่กŒไธๆ˜ฏ็”ฑๆ™ฎ้€š็š„ Python ่งฃ้‡Šๅ™จๅฎŒๆˆ็š„๏ผšๆˆ‘ไปฌไปŽ้›ถๅผ€ๅง‹้‡ๆ–ฐๆž„ๅปบไบ†ไธ€ไธชๆ›ดๅฎ‰ๅ…จ็š„ `LocalPythonInterpreter`ใ€‚ +่ฟ™ไธชๆ‰ง่กŒไธๆ˜ฏ็”ฑๆ™ฎ้€š็š„ Python ่งฃ้‡Šๅ™จๅฎŒๆˆ็š„๏ผšๆˆ‘ไปฌไปŽ้›ถๅผ€ๅง‹้‡ๆ–ฐๆž„ๅปบไบ†ไธ€ไธชๆ›ดๅฎ‰ๅ…จ็š„ `LocalPythonExecutor`ใ€‚ ่ฟ™ไธช่งฃ้‡Šๅ™จ้€š่ฟ‡ไปฅไธ‹ๆ–นๅผ่ฎพ่ฎกไปฅ็กฎไฟๅฎ‰ๅ…จ๏ผš - ๅฐ†ๅฏผๅ…ฅ้™ๅˆถไธบ็”จๆˆทๆ˜พๅผไผ ้€’็š„ๅˆ—่กจ - ้™ๅˆถๆ“ไฝœๆฌกๆ•ฐไปฅ้˜ฒๆญขๆ— ้™ๅพช็Žฏๅ’Œ่ต„ๆบ่†จ่ƒ€ @@ -64,16 +49,16 @@ rendered properly in your Markdown viewer. ็Žฐๅœจไฝ ๅทฒ็ปๅ‡†ๅค‡ๅฅฝไบ†๏ผ -่ฆๅฐ†ไปฃ็ ๆ‰ง่กŒๅ™จ่ฎพ็ฝฎไธบ E2B๏ผŒๅช้œ€ๅœจๅˆๅง‹ๅŒ– `CodeAgent` ๆ—ถไผ ้€’ๆ ‡ๅฟ— `use_e2b_executor=True`ใ€‚ +่ฆๅฐ†ไปฃ็ ๆ‰ง่กŒๅ™จ่ฎพ็ฝฎไธบ E2B๏ผŒๅช้œ€ๅœจๅˆๅง‹ๅŒ– `CodeAgent` ๆ—ถไผ ้€’ๆ ‡ๅฟ— `executor_type="e2b"`ใ€‚ ่ฏทๆณจๆ„๏ผŒไฝ ๅบ”่ฏฅๅฐ†ๆ‰€ๆœ‰ๅทฅๅ…ท็š„ไพ่ต–้กนๆทปๅŠ ๅˆฐ `additional_authorized_imports` ไธญ๏ผŒไปฅไพฟๆ‰ง่กŒๅ™จๅฎ‰่ฃ…ๅฎƒไปฌใ€‚ ```py -from smolagents import CodeAgent, VisitWebpageTool, HfApiModel +from smolagents import CodeAgent, VisitWebpageTool, InferenceClientModel agent = CodeAgent( tools = [VisitWebpageTool()], - model=HfApiModel(), + model=InferenceClientModel(), additional_authorized_imports=["requests", "markdownify"], - use_e2b_executor=True + executor_type="e2b" ) agent.run("What was Abraham Lincoln's preferred pet?") diff --git a/docs/source/zh/tutorials/tools.mdx b/docs/source/zh/tutorials/tools.mdx index e62f6b660..9256bd0a3 100644 --- a/docs/source/zh/tutorials/tools.mdx +++ b/docs/source/zh/tutorials/tools.mdx @@ -1,18 +1,3 @@ - # ๅทฅๅ…ท [[open-in-colab]] @@ -133,9 +118,9 @@ image_generation_tool("A sunny beach") ็„ถๅŽไฝ ๅฏไปฅๅƒไฝฟ็”จไปปไฝ•ๅ…ถไป–ๅทฅๅ…ทไธ€ๆ ทไฝฟ็”จ่ฟ™ไธชๅทฅๅ…ทใ€‚ไพ‹ๅฆ‚๏ผŒ่ฎฉๆˆ‘ไปฌๆ”น่ฟ›ๆ็คบ `A rabbit wearing a space suit` ๅนถ็”Ÿๆˆๅฎƒ็š„ๅ›พ็‰‡ใ€‚ ```python -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel -model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") +model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") agent = CodeAgent(tools=[image_generation_tool], model=model) agent.run( @@ -181,9 +166,9 @@ agent.run("How many more blocks (also denoted as layers) are in BERT base encode ่ฎฉๆˆ‘ไปฌๅฐ† `model_download_tool` ๆทปๅŠ ๅˆฐไธ€ไธชไป…ไฝฟ็”จ้ป˜่ฎคๅทฅๅ…ท็ฎฑๅˆๅง‹ๅŒ–็š„็Žฐๆœ‰ agent ไธญใ€‚ ```python -from smolagents import HfApiModel +from smolagents import InferenceClientModel -model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") +model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.tools[model_download_tool.name] = model_download_tool diff --git a/e2b.Dockerfile b/e2b.Dockerfile deleted file mode 100644 index cd6dd29c8..000000000 --- a/e2b.Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -# You can use most Debian-based base images -FROM e2bdev/code-interpreter:latest - -# Install dependencies and customize sandbox -RUN pip install git+https://github.com/huggingface/smolagents.git \ No newline at end of file diff --git a/examples/agent_from_any_llm.py b/examples/agent_from_any_llm.py index 86f45effb..bc421274c 100644 --- a/examples/agent_from_any_llm.py +++ b/examples/agent_from_any_llm.py @@ -1,18 +1,19 @@ -from typing import Optional - -from smolagents import HfApiModel, LiteLLMModel, TransformersModel, tool +from smolagents import InferenceClientModel, LiteLLMModel, OpenAIServerModel, TransformersModel, tool from smolagents.agents import CodeAgent, ToolCallingAgent # Choose which inference type to use! -available_inferences = ["hf_api", "transformers", "ollama", "litellm"] -chosen_inference = "transformers" +available_inferences = ["hf_api", "hf_api_provider", "transformers", "ollama", "litellm", "openai"] +chosen_inference = "hf_api_provider" print(f"Chose model: '{chosen_inference}'") if chosen_inference == "hf_api": - model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct") + model = InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct") + +elif chosen_inference == "hf_api_provider": + model = InferenceClientModel(provider="together") elif chosen_inference == "transformers": model = TransformersModel(model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto", max_new_tokens=1000) @@ -29,9 +30,13 @@ # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest' model = LiteLLMModel(model_id="gpt-4o") +elif chosen_inference == "openai": + # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest' + model = OpenAIServerModel(model_id="gpt-4o") + @tool -def get_weather(location: str, celsius: Optional[bool] = False) -> str: +def get_weather(location: str, celsius: bool | None = False) -> str: """ Get weather in the next days at given location. Secretly this tool does not care about the location, it hates the weather everywhere. @@ -43,10 +48,10 @@ def get_weather(location: str, celsius: Optional[bool] = False) -> str: return "The weather is UNGODLY with torrential rains and temperatures below -10ยฐC" -agent = ToolCallingAgent(tools=[get_weather], model=model) +agent = ToolCallingAgent(tools=[get_weather], model=model, verbosity_level=2) print("ToolCallingAgent:", agent.run("What's the weather like in Paris?")) -agent = CodeAgent(tools=[get_weather], model=model) +agent = CodeAgent(tools=[get_weather], model=model, verbosity_level=2) print("CodeAgent:", agent.run("What's the weather like in Paris?")) diff --git a/examples/benchmark.ipynb b/examples/benchmark.ipynb deleted file mode 100644 index 79f0ae0a1..000000000 --- a/examples/benchmark.ipynb +++ /dev/null @@ -1,1195 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "!pip install -e .. datasets sympy numpy matplotlib seaborn -q # Install dev version of smolagents + some packages" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Constants and utilities/tools" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Benchmark date\n", - "# - set a concrete date:\n", - "DATE = \"2024-12-26\"\n", - "# - or use default: today\n", - "# DATE = None\n", - "\n", - "# Evaluation dataset\n", - "# - the dataset is gated, so you must first visit its page to request access: https://huggingface.co/datasets/smolagents-benchmark/benchmark-v1\n", - "EVAL_DATASET = \"smolagents-benchmark/benchmark-v1\"\n", - "\n", - "# Answers dataset: it must be a gated dataset; required to score the answers\n", - "ANSWERS_DATASET = \"smolagents-benchmark/answers\"\n", - "# Whether to push the answers dataset to the Hub\n", - "PUSH_ANSWERS_DATASET_TO_HUB = True\n", - "\n", - "# Results dataset\n", - "RESULTS_DATASET = \"smolagents-benchmark/results\"\n", - "# Whether to push the results dataset to the Hub\n", - "PUSH_RESULTS_DATASET_TO_HUB = True\n", - "\n", - "\n", - "import datetime\n", - "import json\n", - "import os\n", - "import re\n", - "import string\n", - "import time\n", - "import warnings\n", - "from typing import List\n", - "\n", - "import datasets\n", - "from dotenv import load_dotenv\n", - "from tqdm import tqdm\n", - "\n", - "from smolagents import (\n", - " AgentError,\n", - " CodeAgent,\n", - " GoogleSearchTool,\n", - " HfApiModel,\n", - " PythonInterpreterTool,\n", - " ToolCallingAgent,\n", - " VisitWebpageTool,\n", - ")\n", - "from smolagents.agents import ActionStep\n", - "\n", - "\n", - "load_dotenv()\n", - "os.makedirs(\"output\", exist_ok=True)\n", - "\n", - "\n", - "def serialize_agent_error(obj):\n", - " if isinstance(obj, AgentError):\n", - " return {\"error_type\": obj.__class__.__name__, \"message\": obj.message}\n", - " else:\n", - " return str(obj)\n", - "\n", - "\n", - "def answer_questions(\n", - " eval_ds,\n", - " agent,\n", - " model_id,\n", - " action_type,\n", - " is_vanilla_llm=False,\n", - " date=DATE,\n", - " output_dir=\"output\",\n", - " push_to_hub_dataset=ANSWERS_DATASET if PUSH_ANSWERS_DATASET_TO_HUB else None,\n", - "):\n", - " date = date or datetime.date.today().isoformat()\n", - "\n", - " for task in eval_ds:\n", - " file_name = f\"output/{model_id.replace('/', '__')}__{action_type}__{task}__{date}.jsonl\"\n", - " answered_questions = []\n", - " if os.path.exists(file_name):\n", - " with open(file_name, \"r\") as f:\n", - " for line in f:\n", - " answered_questions.append(json.loads(line)[\"question\"])\n", - "\n", - " for _, example in tqdm(enumerate(eval_ds[task]), total=len(eval_ds[task])):\n", - " try:\n", - " question = example[\"question\"]\n", - " if example[\"source\"] == \"SimpleQA\":\n", - " question += \" Answer with only the final number.\"\n", - " if example[\"source\"] == \"MATH\":\n", - " question += \" Write code, not latex.\"\n", - " if question in answered_questions:\n", - " continue\n", - " start_time = time.time()\n", - "\n", - " if is_vanilla_llm:\n", - " llm = agent\n", - " answer = str(llm([{\"role\": \"user\", \"content\": question}]).content)\n", - " token_count = {\n", - " \"input\": llm.last_input_token_count,\n", - " \"output\": llm.last_output_token_count,\n", - " }\n", - " intermediate_steps = str([])\n", - " else:\n", - " answer = str(agent.run(question))\n", - " token_count = agent.monitor.get_total_token_counts()\n", - " intermediate_steps = str(agent.logs)\n", - " # Remove memory from logs to make them more compact.\n", - " for step in agent.logs:\n", - " if isinstance(step, ActionStep):\n", - " step.agent_memory = None\n", - "\n", - " end_time = time.time()\n", - " annotated_example = {\n", - " \"model_id\": model_id,\n", - " \"agent_action_type\": action_type,\n", - " \"question\": question,\n", - " \"answer\": answer,\n", - " \"true_answer\": example[\"true_answer\"],\n", - " \"source\": example[\"source\"],\n", - " \"intermediate_steps\": intermediate_steps,\n", - " \"start_time\": start_time,\n", - " \"end_time\": end_time,\n", - " \"token_counts\": token_count,\n", - " }\n", - "\n", - " with open(file_name, \"a\") as f:\n", - " json.dump(annotated_example, f, default=serialize_agent_error)\n", - " f.write(\"\\n\") # add a newline for JSONL format\n", - " except Exception as e:\n", - " print(\"Failed:\", e)\n", - "\n", - " if push_to_hub_dataset:\n", - " ds = datasets.Dataset.from_pandas(pd.read_json(file_name, lines=True), split=\"test\", preserve_index=False)\n", - " config = f\"{model_id.replace('/', '__')}__{action_type}__{task}\"\n", - " data_dir = f\"{model_id}/{action_type}/{task}/{date}\"\n", - " ds.push_to_hub(\n", - " push_to_hub_dataset,\n", - " config_name=config,\n", - " data_dir=data_dir,\n", - " split=\"test\",\n", - " commit_message=f\"Upload {config}\",\n", - " )\n", - "\n", - "\n", - "def normalize_number_str(number_str: str) -> float:\n", - " # we replace these common units and commas to allow\n", - " # conversion to float\n", - " for char in [\"$\", \"%\", \",\"]:\n", - " number_str = number_str.replace(char, \"\")\n", - " try:\n", - " return float(number_str)\n", - " except ValueError:\n", - " return float(\"inf\")\n", - "\n", - "\n", - "def split_string(\n", - " s: str,\n", - " char_list: list[str] = [\",\", \";\"],\n", - ") -> list[str]:\n", - " pattern = f\"[{''.join(char_list)}]\"\n", - " return re.split(pattern, s)\n", - "\n", - "\n", - "def is_float(element: any) -> bool:\n", - " try:\n", - " float(element)\n", - " return True\n", - " except ValueError:\n", - " return False\n", - "\n", - "\n", - "def normalize_str(input_str, remove_punct=True) -> str:\n", - " \"\"\"\n", - " Normalize a string by:\n", - " - Removing all white spaces\n", - " - Optionally removing punctuation (if remove_punct is True)\n", - " - Converting to lowercase\n", - " Parameters:\n", - " - input_str: str, the string to normalize\n", - " - remove_punct: bool, whether to remove punctuation (default: True)\n", - " Returns:\n", - " - str, the normalized string\n", - " \"\"\"\n", - " # Remove all white spaces. Required e.g for seagull vs. sea gull\n", - " no_spaces = re.sub(r\"\\s\", \"\", input_str)\n", - "\n", - " # Remove punctuation, if specified.\n", - " if remove_punct:\n", - " translator = str.maketrans(\"\", \"\", string.punctuation)\n", - " return no_spaces.lower().translate(translator)\n", - " else:\n", - " return no_spaces.lower()\n", - "\n", - "\n", - "def extract_numbers(text: str) -> List[str]:\n", - " \"\"\"This pattern matches:\n", - " - Optional negative sign\n", - " - Numbers with optional comma thousand separators\n", - " - Optional decimal points with decimal numbers\n", - " \"\"\"\n", - " pattern = r\"-?(?:\\d{1,3}(?:,\\d{3})+|\\d+)(?:\\.\\d+)?\"\n", - "\n", - " return [el.replace(\",\", \"\") for el in re.findall(pattern, text)]\n", - "\n", - "\n", - "def get_question_score_gaia(\n", - " model_answer: str,\n", - " ground_truth: str,\n", - ") -> bool:\n", - " \"\"\"Scoring function used to score functions from the GAIA benchmark\"\"\"\n", - " if is_float(ground_truth):\n", - " normalized_answer = normalize_number_str(str(model_answer))\n", - " return normalized_answer == float(ground_truth)\n", - "\n", - " elif any(char in ground_truth for char in [\",\", \";\"]): # if gt is a list\n", - " # question with the fish: normalization removes punct\n", - " gt_elems = split_string(ground_truth)\n", - " ma_elems = split_string(model_answer)\n", - "\n", - " if len(gt_elems) != len(ma_elems): # check length is the same\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - " return False\n", - "\n", - " comparisons = []\n", - " for ma_elem, gt_elem in zip(ma_elems, gt_elems): # compare each element as float or str\n", - " if is_float(gt_elem):\n", - " normalized_ma_elem = normalize_number_str(ma_elem)\n", - " comparisons.append(normalized_ma_elem == float(gt_elem))\n", - " else:\n", - " # we do not remove punct since comparisons can include punct\n", - " comparisons.append(\n", - " normalize_str(ma_elem, remove_punct=False) == normalize_str(gt_elem, remove_punct=False)\n", - " )\n", - " return all(comparisons)\n", - "\n", - " else: # if gt is a str\n", - " return normalize_str(model_answer) == normalize_str(ground_truth)\n", - "\n", - "\n", - "def get_correct(row):\n", - " if row[\"source\"] == \"MATH\": # Checks the last number in answer\n", - " numbers_answer = extract_numbers(str(row[\"answer\"]))\n", - " if len(numbers_answer) == 0:\n", - " return False\n", - " return float(numbers_answer[-1]) == float(row[\"true_answer\"])\n", - " else:\n", - " return get_question_score_gaia(str(row[\"answer\"]), str(row[\"true_answer\"]))\n", - "\n", - "\n", - "def score_answers(\n", - " answers_subsets,\n", - " answers_dataset=ANSWERS_DATASET,\n", - " date=DATE,\n", - " push_to_hub_dataset=RESULTS_DATASET if PUSH_RESULTS_DATASET_TO_HUB else None,\n", - " set_default=True,\n", - "):\n", - " if not answers_dataset:\n", - " raise ValueError(\"Pass 'answers_dataset' to load the answers from it\")\n", - " date = date or datetime.date.today().isoformat()\n", - " results = []\n", - " for answers_subset in answers_subsets:\n", - " *model_id, action_type, task = answers_subset.split(\"__\")\n", - " model_id = \"/\".join(model_id)\n", - " ds = datasets.load_dataset(answers_dataset, answers_subset, split=\"test\")\n", - " df = ds.to_pandas()\n", - " df[\"correct\"] = df.apply(get_correct, axis=1)\n", - " acc = df[\"correct\"].mean().item()\n", - " result = df.loc[0, [\"model_id\", \"agent_action_type\", \"source\"]].to_dict()\n", - " result[\"acc\"] = acc\n", - " results.append(result)\n", - " df = pd.DataFrame(results)\n", - "\n", - " if push_to_hub_dataset:\n", - " ds = datasets.Dataset.from_pandas(df)\n", - " config = date\n", - " set_default = set_default\n", - " ds.push_to_hub(\n", - " push_to_hub_dataset, config_name=config, set_default=set_default, commit_message=f\"Upload {config} results\"\n", - " )\n", - " return df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Evaluation dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['gaia', 'math', 'simpleqa']\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
questionsourcetrue_answertrue_reasoning
0What year was the municipality of Ramiriquรญ, B...SimpleQA1541['https://en.wikipedia.org/wiki/Ramiriqu%C3%AD...
1In what year did Hjalmar Hvam invent a mechani...SimpleQA1937['https://www.kgw.com/article/features/portlan...
2In which year did Fayaz A. Malik (an Indian ph...SimpleQA2009['https://en.wikipedia.org/wiki/Fayaz_A._Malik...
3In which year was John B. Goodenough elected a...SimpleQA2010['https://en.wikipedia.org/wiki/John_B._Gooden...
4In which year did Atul Gawande earn an M.A. in...SimpleQA1989['https://en.wikipedia.org/wiki/Atul_Gawande',...
\n", - "
" - ], - "text/plain": [ - " question source true_answer \\\n", - "0 What year was the municipality of Ramiriquรญ, B... SimpleQA 1541 \n", - "1 In what year did Hjalmar Hvam invent a mechani... SimpleQA 1937 \n", - "2 In which year did Fayaz A. Malik (an Indian ph... SimpleQA 2009 \n", - "3 In which year was John B. Goodenough elected a... SimpleQA 2010 \n", - "4 In which year did Atul Gawande earn an M.A. in... SimpleQA 1989 \n", - "\n", - " true_reasoning \n", - "0 ['https://en.wikipedia.org/wiki/Ramiriqu%C3%AD... \n", - "1 ['https://www.kgw.com/article/features/portlan... \n", - "2 ['https://en.wikipedia.org/wiki/Fayaz_A._Malik... \n", - "3 ['https://en.wikipedia.org/wiki/John_B._Gooden... \n", - "4 ['https://en.wikipedia.org/wiki/Atul_Gawande',... " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "\n", - "# Choose the tasks to evaluate on:\n", - "# tasks = [\"gaia\"]\n", - "# or evaluate on all tasks: [\"gaia\", \"math\", \"simpleqa\"]\n", - "tasks = datasets.get_dataset_config_names(EVAL_DATASET)\n", - "print(tasks)\n", - "\n", - "\n", - "eval_ds = {task: datasets.load_dataset(EVAL_DATASET, task, split=\"test\") for task in tasks}\n", - "pd.DataFrame(eval_ds[\"simpleqa\"]).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Benchmark agents\n", - "\n", - "### Open models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "open_model_ids = [\n", - " \"meta-llama/Llama-3.3-70B-Instruct\",\n", - " # \"Qwen/QwQ-32B-Preview\",\n", - " \"Qwen/Qwen2.5-72B-Instruct\",\n", - " \"Qwen/Qwen2.5-Coder-32B-Instruct\",\n", - " \"meta-llama/Llama-3.2-3B-Instruct\",\n", - " \"meta-llama/Llama-3.1-8B-Instruct\",\n", - " \"mistralai/Mistral-Nemo-Instruct-2407\",\n", - " # \"HuggingFaceTB/SmolLM2-1.7B-Instruct\",\n", - " # \"meta-llama/Llama-3.1-70B-Instruct\",\n", - "]\n", - "\n", - "\n", - "for model_id in open_model_ids:\n", - " print(f\"Evaluating '{model_id}'...\")\n", - " # action_type = \"tool-calling\"\n", - " # agent = ToolCallingAgent(\n", - " # tools=[GoogleSearchTool(), VisitWebpageTool(), PythonInterpreterTool()],\n", - " # model=HfApiModel(model_id),\n", - " # max_steps=10,\n", - " # )\n", - " # answer_questions(eval_ds, agent, model_id, action_type)\n", - "\n", - " action_type = \"code\"\n", - " agent = CodeAgent(\n", - " tools=[GoogleSearchTool(), VisitWebpageTool()],\n", - " model=HfApiModel(model_id),\n", - " additional_authorized_imports=[\"numpy\", \"sympy\"],\n", - " max_steps=10,\n", - " )\n", - " answer_questions(eval_ds, agent, model_id, action_type)\n", - "\n", - " # Also evaluate vanilla model\n", - " action_type = \"vanilla\"\n", - " llm = HfApiModel(model_id)\n", - " answer_questions(eval_ds, llm, model_id, action_type, is_vanilla_llm=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Closed models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from smolagents import LiteLLMModel\n", - "\n", - "\n", - "litellm_model_ids = [\"gpt-4o\", \"anthropic/claude-3-5-sonnet-latest\"]\n", - "\n", - "\n", - "for model_id in litellm_model_ids:\n", - " print(f\"Evaluating '{model_id}'...\")\n", - " action_type = \"tool-calling\"\n", - " agent = ToolCallingAgent(\n", - " tools=[\n", - " GoogleSearchTool(),\n", - " VisitWebpageTool(),\n", - " PythonInterpreterTool([\"numpy\", \"sympy\"]),\n", - " ],\n", - " model=LiteLLMModel(model_id),\n", - " max_steps=10,\n", - " )\n", - " answer_questions(eval_ds, agent, model_id, action_type)\n", - "\n", - " action_type = \"code\"\n", - " agent = CodeAgent(\n", - " tools=[GoogleSearchTool(), VisitWebpageTool()],\n", - " model=LiteLLMModel(model_id),\n", - " additional_authorized_imports=[\"numpy\", \"sympy\"],\n", - " max_steps=10,\n", - " )\n", - " answer_questions(eval_ds, agent, model_id, action_type)\n", - "\n", - " # Also evaluate vanilla model\n", - " action_type = \"vanilla\"\n", - " llm = LiteLLMModel(model_id)\n", - " answer_questions(eval_ds, llm, model_id, action_type, is_vanilla_llm=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# import glob\n", - "# import json\n", - "\n", - "# jsonl_files = glob.glob(f\"output/*.jsonl\")\n", - "\n", - "# for file_path in jsonl_files:\n", - "# if \"-Nemo-\" in file_path and \"-vanilla-\" in file_path:\n", - "# print(file_path)\n", - "# # Read all lines and filter out SimpleQA sources\n", - "# filtered_lines = []\n", - "# removed = 0\n", - "# with open(file_path, \"r\", encoding=\"utf-8\") as f:\n", - "# for line in f:\n", - "# try:\n", - "# data = json.loads(line.strip())\n", - "# data[\"answer\"] = data[\"answer\"][\"content\"]\n", - "# # if not any([question in data[\"question\"] for question in eval_ds[\"question\"]]):\n", - "# # removed +=1\n", - "# # else:\n", - "# filtered_lines.append(json.dumps(data) + \"\\n\")\n", - "# except json.JSONDecodeError:\n", - "# print(\"Invalid line:\", line)\n", - "# continue # Skip invalid JSON lines\n", - "# print(f\"Removed {removed} lines.\")\n", - "# # Write filtered content back to the same file\n", - "# with open(\n", - "# str(file_path).replace(\"-vanilla-\", \"-vanilla2-\"), \"w\", encoding=\"utf-8\"\n", - "# ) as f:\n", - "# f.writelines(filtered_lines)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Score answers" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of answers_subsets 54\n", - "Example of answers_subset Qwen__Qwen2.5-72B-Instruct__code__gaia\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", - "/tmp/ipykernel_640885/2542893079.py:194: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
model_idagent_action_typesourceacc
0Qwen/Qwen2.5-72B-InstructcodeGAIA28.12
1Qwen/Qwen2.5-72B-InstructcodeMATH76.00
2Qwen/Qwen2.5-72B-InstructcodeSimpleQA88.00
3Qwen/Qwen2.5-72B-InstructvanillaGAIA6.25
4Qwen/Qwen2.5-72B-InstructvanillaMATH30.00
\n", - "
" - ], - "text/plain": [ - " model_id agent_action_type source acc\n", - "0 Qwen/Qwen2.5-72B-Instruct code GAIA 28.12\n", - "1 Qwen/Qwen2.5-72B-Instruct code MATH 76.00\n", - "2 Qwen/Qwen2.5-72B-Instruct code SimpleQA 88.00\n", - "3 Qwen/Qwen2.5-72B-Instruct vanilla GAIA 6.25\n", - "4 Qwen/Qwen2.5-72B-Instruct vanilla MATH 30.00" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import datasets\n", - "import pandas as pd\n", - "\n", - "\n", - "# Choose the answers subsets to score:\n", - "# answers_subsets = [\"meta-llama__Llama-3.1-8B-Instruct__code__gaia\"]\n", - "# or get all the answers subsets present in the ANSWERS_DATASET\n", - "answers_subsets = datasets.get_dataset_config_names(ANSWERS_DATASET)\n", - "print(\"Number of answers_subsets\", len(answers_subsets))\n", - "print(\"Example of answers_subset\", answers_subsets[0])\n", - "\n", - "\n", - "result_df = score_answers(answers_subsets)\n", - "result_df[\"acc\"] = (result_df[\"acc\"] * 100).round(2)\n", - "result_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "pivot_df = result_df.pivot_table(\n", - " index=[\"model_id\", \"source\"],\n", - " columns=[\"action_type\"],\n", - " values=\"correct\",\n", - " fill_value=float(\"nan\"),\n", - ").reset_index()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display results" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
action_typemodel_idsourcecodevanilla
0Qwen/Qwen2.5-72B-InstructGAIA28.16.2
1Qwen/Qwen2.5-72B-InstructMATH76.030.0
2Qwen/Qwen2.5-72B-InstructSimpleQA88.010.0
3Qwen/Qwen2.5-Coder-32B-InstructGAIA25.03.1
4Qwen/Qwen2.5-Coder-32B-InstructMATH86.060.0
5Qwen/Qwen2.5-Coder-32B-InstructSimpleQA86.08.0
6anthropic/claude-3-5-sonnet-latestGAIANaN3.1
7anthropic/claude-3-5-sonnet-latestMATHNaN50.0
8anthropic/claude-3-5-sonnet-latestSimpleQANaN34.0
9gpt-4oGAIA25.63.1
10gpt-4oMATH58.040.0
11gpt-4oSimpleQA86.06.0
12meta-llama/Llama-3.1-8B-InstructGAIA3.10.0
13meta-llama/Llama-3.1-8B-InstructMATH14.018.0
14meta-llama/Llama-3.1-8B-InstructSimpleQA2.06.0
15meta-llama/Llama-3.2-3B-InstructGAIA3.10.0
16meta-llama/Llama-3.2-3B-InstructMATH40.012.0
17meta-llama/Llama-3.2-3B-InstructSimpleQA20.00.0
18meta-llama/Llama-3.3-70B-InstructGAIA31.23.1
19meta-llama/Llama-3.3-70B-InstructMATH72.040.0
20meta-llama/Llama-3.3-70B-InstructSimpleQA78.012.0
21mistralai/Mistral-Nemo-Instruct-2407GAIA0.03.1
22mistralai/Mistral-Nemo-Instruct-2407MATH30.022.0
23mistralai/Mistral-Nemo-Instruct-2407SimpleQA30.06.0
\n", - "
" - ], - "text/plain": [ - "action_type model_id source code vanilla\n", - "0 Qwen/Qwen2.5-72B-Instruct GAIA 28.1 6.2\n", - "1 Qwen/Qwen2.5-72B-Instruct MATH 76.0 30.0\n", - "2 Qwen/Qwen2.5-72B-Instruct SimpleQA 88.0 10.0\n", - "3 Qwen/Qwen2.5-Coder-32B-Instruct GAIA 25.0 3.1\n", - "4 Qwen/Qwen2.5-Coder-32B-Instruct MATH 86.0 60.0\n", - "5 Qwen/Qwen2.5-Coder-32B-Instruct SimpleQA 86.0 8.0\n", - "6 anthropic/claude-3-5-sonnet-latest GAIA NaN 3.1\n", - "7 anthropic/claude-3-5-sonnet-latest MATH NaN 50.0\n", - "8 anthropic/claude-3-5-sonnet-latest SimpleQA NaN 34.0\n", - "9 gpt-4o GAIA 25.6 3.1\n", - "10 gpt-4o MATH 58.0 40.0\n", - "11 gpt-4o SimpleQA 86.0 6.0\n", - "12 meta-llama/Llama-3.1-8B-Instruct GAIA 3.1 0.0\n", - "13 meta-llama/Llama-3.1-8B-Instruct MATH 14.0 18.0\n", - "14 meta-llama/Llama-3.1-8B-Instruct SimpleQA 2.0 6.0\n", - "15 meta-llama/Llama-3.2-3B-Instruct GAIA 3.1 0.0\n", - "16 meta-llama/Llama-3.2-3B-Instruct MATH 40.0 12.0\n", - "17 meta-llama/Llama-3.2-3B-Instruct SimpleQA 20.0 0.0\n", - "18 meta-llama/Llama-3.3-70B-Instruct GAIA 31.2 3.1\n", - "19 meta-llama/Llama-3.3-70B-Instruct MATH 72.0 40.0\n", - "20 meta-llama/Llama-3.3-70B-Instruct SimpleQA 78.0 12.0\n", - "21 mistralai/Mistral-Nemo-Instruct-2407 GAIA 0.0 3.1\n", - "22 mistralai/Mistral-Nemo-Instruct-2407 MATH 30.0 22.0\n", - "23 mistralai/Mistral-Nemo-Instruct-2407 SimpleQA 30.0 6.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(pivot_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAABdYAAAJOCAYAAAC6HlVrAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA3O5JREFUeJzs3QmcHHWZ//Gnu+dMZjJDJndCAgmSEMkBCZfLTUTFRPiDCIio664grrAXKocou8KiiCwrouCqgMoCXkQSWIQgBJFjIZAQjEFNICHHJJOEOZK5u/v/emqmZqrPqequrq7q/rxfrzFOTU/3r7qLrppv/37PE4rH43EBAAAAAAAAAAC2hO3dDAAAAAAAAAAAKIJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAABAIl1xyicyePVsuvPDCjLf553/+Z+M2V199dd6P99JLLxn3pf+6+Tvmfli/jjzySDn11FPl3/7t36StrU3c0NzcLBdffLHMmzdPTjjhBOnq6nLlfkvVW2+9JTfccIMsWbJE5s+fb7we//Iv/yIbN26UUvHrX//aON62bdtW7KEAAAAAgVdR7AEAAADYFQ6HZe3atUZoPGnSpISfdXZ2ytNPPy1BMHfuXPna17429H1fX5/88Y9/lNtuu03+9Kc/yQMPPCChUCivx7jvvvuM5+pb3/qWTJw4UWpra10YeWl64okn5Etf+pK85z3vkcsvv1ymTZtmHGP6HH7sYx+T73//+/I3f/M3EnT6YcFDDz0kEyZMKPZQAAAAgMAjWAcAAIGhgfRf//pXefzxx+XTn/50ws80VNfweMyYMeJ3dXV1snDhwoRtxxxzjBw4cEC+853vyLp161J+7lRra6sRoJ511ll5jra0bd26Vb785S/LSSedJLfffrtEIpGhn5155ply0UUXGT//3e9+J1VVVRJkY8eONb4AAAAA5I9SMAAAIDBGjRolp5xyihGsJ3vsscfkAx/4gFRUJM4b6OnpkTvvvFM++MEPGmVRNCz9wQ9+ILFYLOF2Dz74oPH7WgbkE5/4hOzYsSPlMXSblgc59thjZcGCBfKpT31KNmzY4Nr+aUkY83FMq1atknPPPdcYu86avvHGG43Z+aY77rhD3v/+98t3v/tdY1wnnniiLFq0yCj7ofejpT/0Nmr37t1yzTXXGM+h7udHP/pReeqppxLGoLfX+9LH1Nvo/9f70sd/5ZVX5LzzzjP+vz5XGjZv3rzZeB70+dBxPProown39/LLL8vf/d3fGR8c6P6dfvrpxnjM51/Lkuhj/u///q9ceeWVctRRRxn78ZWvfCVhP+PxuNx7773yoQ99yBiXPtaPfvQjY7tJx6evnY5F70MD8X379mV9zn/6059Kb2+v8XjWUF3pBzV6H7rP1hI9eqzp86Nj1dfkq1/9asLPdf/0eHvyySdl6dKlxvN19tlny2uvvWasIjj//PONfdCfvfDCCwm/p8+Pfkikv6/7oTPmk0sLaXmaL3zhC3L88cfLe9/7XuNDAT0uuru7R3wdraVg9Ln513/9V2MfzDEuX7484bHefvtt43XR2+iHPVrKaM2aNUM/t/v6AQAAAKWGYB0AAASKzsA2y8GY9u/fL88++6wRVFpp6Pq5z31OfvjDHxph5l133WUEljoz2VqK5Wc/+5nxvQbO3/ve94xA8/rrr0+4Lw0htb67lmzRn3372982wmGtY75p0ybX6nyrgw8+2Ph3xYoV8g//8A8yc+ZM48MBDVMfeeQR+fznP58QKGuAvnr1avnP//xPIzjXsFj3Zfz48UbpD933PXv2GEG6hs9ai15D3KlTpxr3r/dppc/TsmXLjNnzGqCr/v5+I4TV50BLo2jofNVVVxnPr5YY0d/RGfIaRJuvjQbAurKgsbHRGJv+3uLFi42QV4NYK33+dTz6/GsQ/8tf/tK4vemWW24xvjR41sfSfbn11luND0nMAF8fq6amxnh9r732Wvm///s/+eQnP5kQOCf7/e9/b6yE0HI56Wh9en2+9LlUOj79cEVDZn1+9Pn77W9/awTO1sfR5+Ab3/iG8fz813/9l7S3txvBs/6uvh76euprqPdt/T09zvQ5/PjHP278nu6PPh9aIsj8cESPOa2Zr/f/3//93/LhD3/YeM1/8pOfjPg6Wn3xi180jl2t7a/3o8+DPvaLL75o/FxXh2gwr+G5BuX6fGuJIv0gRZ9bJ68fAAAAUHLiAAAAAfCJT3zC+Orq6oovXLgwfs899wz97Ne//nX8lFNOicdisfhpp50W//KXv2xsf+aZZ+KHH354fOXKlQn3deeddxrb//znPxu/c8IJJ8T/6Z/+KeE2X/3qV43bvPjii8b3t912W3zevHnxbdu2Dd2mp6cnfsYZZ8SvuOIK43u9rfV3Mu3HxRdfHO/r6xv62rNnT/yxxx6LH3vssfELLrjAGJN+nXzyyfG/+7u/S/j9559/3niMp59+2vj+O9/5jvH9yy+/nHA7fQ70uTDdcsst8fe+970J41ef+tSn4n/zN38Tj0ajxvd6X7rN6le/+pWx/X/+53+Gtj366KPGtttvv31o2/r1641tTz75pPH9ww8/HP/7v//7oftW+v8XLVoUv/76643v33nnHeN3rrrqqoTHvOSSS+JLly41/n9bW1t87ty58ZtuuinhNl//+teHnh993vT2/f39Qz/fvHlz/Igjjoj/7Gc/y/h6LFiwIOW1z6S1tTV+5JFHDo3dpM+97oP5OOZrsnr16qHb3H333ca2X/ziF0PbHn/8cWPbhg0bEn5PnzeTHu/6+phj/P3vf28cPx0dHQlj0H3/zGc+M/R9ttdRn3Ol+/L9738/4bX5xje+EV+zZo3x/T/+4z/GjzvuuITH0uP1Ax/4QPy8886z/foBAAAApYgZ6wAAIFB0Bq/OWraWg9HyI1oiJLnhp86q1dIwOkvd6iMf+cjQz7WUyd69e+W0005LuI3en5WW7DjiiCOMmc06e1u/tJnqySefLM8//7yjfdDZ1VrCw/x63/veZ8xk1lIpOhNe90PHpbOedV/Nx9MvLamiNdr/8Ic/JNynji0b3Vct06GzipOfi5aWFuPxRrov/X1TU1OT8a/O7jfpzHSls7PVOeecY8yE1uasOntdZ3br7OloNGpss0quKa/Nac1SIrpCQfddy/hY6SxqXY2gs7e1Lr3O0tdZ4OZzpTP/Z82alfJcWWn5Fx2PHToOLRuTvDJCZ+Hr85o8i/voo48e+v/jxo0b8flSerxa71+Pdz3G9JhRWupHV1hUV1cbM8q1lI/ODNeZ7jo2J8fEcccdZ6xc0Jn0v/jFL4xVDTpj3Ry37o/+d6HHm3V8OkP+jTfeMHoC2Hn9AAAAgFJE81IAABA4GnprWRQNnjVg1ND7n/7pn1Jup3WvDzrooJTa2WZZj46OjqHa2Hq7dLexNgPdsmWLEYSno+GuXXofWn5DaYiu+zB58uSEAFMfT+ntzNtaaUkQq9GjR2d9TN1Ps8SMlRn4WsNdrWWfjnV8Ji0Jk4mWOPn6178uv/nNb4yge9q0aUY4r+GstZRNuvvRDy3M25jPRabGmzp2LcujIb5+JdPnN5MpU6akradv0g8A9LnT58k8VsznzEq36fGUz/Nl3k9ynwD9EMN8DnQ/b7vtNrn//vuN4FqPG62hnm4fM72OJi3Po+VitCyPfuihz7l+yPPv//7vxgcF5n6nG6O+NlqCyc7rBwAAAJQignUAABA4OoNXg2Sdta7hoQa2ZuNPq4aGBnn33XeNGcnWcN0MpTVMNwN1nbVuZQaZpvr6eqMp45e+9KW0Y6qqqrI9fh27NovMZsyYMca/+nj6uOn2zQm9vc5MT2ZuS/5gwQ033XSTEdhqzXMNbM2gV+uWO2E+FzorW+vNmzQQ37p1q/Ha6wcUWmNdZ1M7CbN1Bvh9991nPA/JH6YorV2vddS1Lrz5nOvMbus4lP5+ug8unEo+7szHM1cIaE15beKqH7boDH49LpXWnHdKf1frrOuXrljQ2e9aI13vWx9H91cfO9sxk/wBDwAAAFAuKAUDAAACR0PsJUuWGKGtzrZNF6YqDaR1prS1bIwym3UuWrRIDjnkEGPWb/Jtnn766ZT70uaihx56qBGKm186G1sbNSbPis+XBrcapmrjSOvjaSkaLRezYcMGR/enJWRee+012b59e8pzoYHyjBkzxG1r1qwxyo3oa2WG6lpCRANynXltl87IrqysTHlNfvzjHxsldPS+tfGmhsPW5+o973mPUerkpZdeynjf2ghU71s/BEguCaMzwrV0jQbI+mGOlnHRY2/lypUJt9OGsBryW0u/5Epn+WtDVev32pjX/DBCn9PDDjtMzjvvvKFQfdeuXfLnP//Z0XOqx4GWzjGPez3ePvvZzxofgJgz+PWY0efcOjNdnyMtvaTPr5MPkwAAAIBSw4x1AAAQSGeddZZcdtllRskJrbWdjoahGuzqzzV8nDNnjlE3WsuF/L//9/+MgFJdddVV8q//+q/G7bQeu9bSfuCBBxLuS2dDa4iu/37mM58xwtbHHntMfv7zn8s111zj+v5pUP/P//zP8tWvftX4/1rrWkue6Ixi3ZdMJWky+du//VsjRNfxaxkdre+9fPlyefHFF+U//uM/jOfRbRqI6wcf+lxqrXOts671wHV2uZPSOVoC5pOf/KQxU1vDXP2QQ2uq6/3qjH4duwbsl156qfE6at14DYA1eNfbff7zn89437ra4YYbbpDrrrvOCNkvvPBC44MWnQl/zz33yDvvvCM/+tGPjFIr+qWPceeddxphvL4m+sHHf/3XfxnHkh5TbtDjSUsb6Qcr+tga8F9++eVDz6keAzqjXOuaa3miu+++26iv7uQ51VIvWgf9xhtvNILz6dOnGx966Ax9/e9K6XGiob4+97rfus9a312fE61tDwAAAJQzgnUAABBIOrNWS4RoCKqhbToa4GroqLOONZTVmdIapGoIq0GzSZtFajirgaWG54cffrhRZ1pvZ9KZ4g8++KAxW1yD2J6eHmO2u850zqUMhx3nn3++UTZGQ8yHHnrImJmts6JvvfVWx2VHdFa6BtE6fg1TtXa4ftCg+3zGGWcUZPxXX3218ThaCkaDX33uNSDWppu/+93vbDcNVVquRINmfQ30+dD7uv76640g3CzpoiG0lmzRZpwaAuuHDxqOJzfWTKaBuM7Y15IwOlYtC6TPlz7XOuPdenxdccUVRo1xDZj1NdEPKPTDGA3CR6ppbpceX/phhx6vOgZ93cwVBRp6a3mjn/zkJ0bAr8f/2WefPXSs64cvZumckehzpfXa9YMBvU+9Lw3TNURXOuP/f/7nf4zbaNivj6HBvj62NmwFAAAAylkoTlchAAAAoOg0xNew+8033yz2UAAAAACMgBrrAAAAAAAAAAA4QLAOAAAAAAAAAIADlIIBAAAAAAAAAMABZqwDAAAAAAAAAOAAwToAAAAAAAAAAA4QrAMAAAAAAAAAUG7B+ic+8QnjCwAAAAAAAACAQquQErBz585iDwEAAAAAAAAAUCZKYsY6AAAAAAAAAABeIVgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAPC59u4+2d3ebetLb5uLzs5Ouf322+WDH/ygzJ8/X4477ji58sor5S9/+UvKba+++mqZPXu2bN26NeVnl1xyidxxxx2OfidoKoo9AAAAAAAAAAAodcvueM72bVdccWLC9xqU/+T5t6WnPyZ79vdIXzQu4+uqpKoikvK7etuKcEiuOOM9Mqam0vZjHjhwQD7+8Y8b4boG4HPmzJF3331X7r//frnwwgtl+fLlcvDBBxu37enpkSeffFKmT59ubNfwfSS5/I6fEawDAAAAAAAAgI9190aNUD0SFmkcVSUfnjdZJtRXp9zu1a3vyrN/3iNSGTZ+x0mwfuedd8revXvlsccekzFjxhjbpk6dKjfffLPs3LlT7r33Xrn++uuN7atXr5bKykojiP/pT38qV1xxhYRCoaz3n8vv+BmlYAAAAAAAAADA53Smek9/XD51wiFy5NQGmTCmJuHrrT0H5E87O+Tkw8c5CtRVLBaThx9+WP72b/92KFS3uuWWW+SLX/zi0PcrV66UxYsXy2mnnSbbt2+Xl19+ecTHyOV3/IxgHQAAAAAAAAB8Tsu/6Ez1SQ01KT97afNeeX7TXnnfrCY5evpBju9ba57v27fPCL7TmTBhgtTU1AyVjFm9erURkB9yyCEya9YsI5TPJpff8TuCdQAAAAAAAADwOa2pnq78izVUP25mU073rbXUVUNDw9C2559/Xo466qihrw9/+MPG9lWrVklfX58Rkqv3v//98tvf/la6uroy3n8uv+N3BOsAAAAAAAAA4HPpGpW6Eaors/xLe3v70DYN07XJqH59/vOfHwrBH330UTn66KNl7NixxvdnnnmmMSP9iSeeyHj/ufyO39G8FAAAAAAAAAACxq1QXc2YMUMaGxvltddek/nz5xvbamtrje2qqalpaGa7zmTv7++XuXPnJtyHBvBnn312yn3n8jtBQLAOAAAAAAAAACUUqvf2Rx3dX0VFhZx33nly3333Gf/W1dUl/HzXrl3GvzrDXBud3n///VJfXz/0c62Xfu+990pzc7NMmjQp4Xdz+Z0goBQMAAAAAAAAAJRIqL67o0da9vc6vt8rrrhCxo8fLxdeeKE8/vjj8s4778jrr78u119/vXznO9+RRYsWycqVK+Wkk04y/v/hhx8+9PXpT39awuGw/OY3v0m531x+JwgI1gEAAAAAAAAgAJ55c7c8uWGXHDG5Xg4dN1p2t3cnfL2xvU1+seYdqYyEHN+3ln756U9/apRm+d73vidLly6Vv/u7v5MdO3bIHXfcIVdddZW88sor8tGPfjTldydOnChnnHGGMQs9eaa7098JilA8Ho9LwOkLoJ566qliDwUAAAAAAAAAXNXe3Sc/WL1ZdrZ1yZjaShlTU5m2/IvOVNdQfUpDrXzqbw5Jezu4gxrrAAB47IKVF7hyPw8tfciV+wEAAAAA+JsG5JeeMlO6e+3VTq+pihCqFxjBOgAAAAAAAAD4nAblhOX+QY11AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcqHByYwAAAAAAAABAEXS3ifR12bttZa1ITYPtu549e7bx79NPPy1TpkxJ+NkDDzwgN9xwg3zhC1+QK664IuFnp59+usTjcfnd734noVBoaNv27dszPtabb74pV199tfH/v/GNbyT8bNu2bXLGGWfIU089JdOmTRM/I1gHAAAAAAAAgEK7+xT7t71sdWqo/tLdIrFo4vZ4TGT/bpFYn8joCSIV1QPbIxUix17qKFyvrKw0AvJPfOITCdtXrVo1FJpbvfbaa9Ld3W18vfTSS3L88ccb23/5y19KNDowzptuusn497rrrpNSQ7AOAAAQYBesvMCV+3lo6UOu3A8AAACAAtCZ6hqqH7FMZPS4gW39vSJ/+s1AqD73HJH6SQPbD+wR+dOKgd9xEKwvXrw4JVjfv3+/EaDPnTs35faPPvqo8Tt9fX2yfPnyoWB97NixQ7epqakx/h0/fryUGmqsAwAAAAAAAEAQaKiuAXrtQSJvPSMS7R2YmT5l4cB2/TKDd4e0BMv//d//GWG66ZlnnjHC89GjRyfcNhaLyeOPP2787LTTTpPf/va30tnZKeWEYB0AAAAAAAAAgqK/R+T1h0QOtIgsuEhkTGJN9KHSMQ4dfvjhMnHiRHn22WeHtj355JOyZMmSlNu+9NJL0tLSYoTq+qXlYJ544gkpJwTrAAAAAAAAABAEWv5lpFD9nZdFultznrWu5WBUb2+v/OEPfzC2JVu5cqXR8PTggw82yrwsXLhQHn74YUePtWLFCjnqqKMSvpYuXSpBQY11AAAAAAAAAPA7bVSqNdW1/EumUP3tP4hsfVGkpjGnh9AQ/corr5T+/n554YUXjFnsTU1NCbfRwP3JJ59MqMV+5plnyje/+U3ZsWOHTJmSZlxpnH766XLVVVclbNu1a5dccsklEgQE6wAAAAAAAADgd/t3DzQq1ZrqmUL1t54VmX68yK43cnqIRYsWGf+uWbNGVq1aJe9///tTbvP73/9e2tra5Pvf/77cddddxrZ4PG58/eY3v5HLL7/c1mNp3fYZM2YkbItEIhIUlIIBAAAAAAAAAL/TUH3uOdlD9UNPFjn4mJwfoqKiQk455RSjHMzTTz+dtr76Y489JjNnzjRC9OXLlxtf+v+POeYY4/+XC4J1AAAAAAAAAPC70RNE6idlD9UP+Zu8H0bLwfziF78wSsBoDXWrrq4uI3T/6Ec/apSJsX5dfPHF8vbbb8trr70m5YBSMAAAAAAAAADgdxXVIgf2pDYq1ZrqWv6laZZIR3PqbRw68cQTjRrr6Wara6je19cn55xzTsrP9PbayFSbmGoj0lIXimvxm4AzO9M+9dRTxR4KAAAjumDlBa7cz0NLH3LlfhBsHE8AAABAGehuE/m/H4hE+xO3dbcONCqtaUi8faRioBZ78na4hhnrAAAAAAAAAOBnGpBrUN7XZe/2lbWE6gVGsA4ANjErFAAAAAAAFI0G5YTlvkHzUgAAAAAAAAAAHCBYBwAAAAAAAADAAYJ1AAAAAAAAAAAcIFgHAAAAAAAAAMABgnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAACf6+jtkJbOFltfelun+vr65I477pAzzjhDjjzySDn11FPl5ptvlv379xs/P/300+XXv/616/t19dVXG19O/OIXv5Bzzz1XFi5cKCeeeKLx+++8807a2+r22bNnyxe/+EVxU4Wr9wYAAAAAAAAASHHBygts3/ahpQ8lfK9B+QMbH5DOvk7Z07VHKsOV0lTbJOFQ4rzpWDwme7v2Sl1VnVw6/1Kpr6q3/Zi33nqrPP/883LjjTfKwQcfbATSN910k2zZskXuuusu+eUvfymjRo2SYvvKV74iTz/9tFx11VVyzDHHyJ49e+SHP/yhnH/++XLfffcZIbrVY489JtOnT5dVq1bJgQMHZPTo0a6Mg2AdAAAAAAAAAHysu7/bCNW7+rvkPQe9R5ZMXyJVkaqE2/RGe2XV1lVGCB8JRYzfcRKsP/zww/If//EfcsIJJxjfT5s2TW644Qa5+OKLZffu3TJhwgQpttWrV8tvfvMbY+b8e97znqFx6kz7f/iHf5Brr71WfvWrXyX8zsqVK+UTn/iE3HnnnfLb3/7WmOnuBoJ1AAAAAAAQ+NmdTmZ+AkAQ6Ux1DdUvmnNR2lB95eaV0hfrk7NnnS3PbHvG8f2HQiF58cUXjZIv4fDATPijjjpKHn30UTnooIOM7V/4wheMYPqSSy4xSsU899xzsmbNGjn88MPl29/+tjFzXIPs8ePHGzPfjz32WHnppZeMMix///d/b4TbkUjE+P3LL7887TiefPJJ+c///E/Zvn27EZ5/6UtfMu5H/fznP5clS5YMherWsWuwrmP705/+JEcccYSx/a9//av8+c9/luOOO05ef/1148MDt4J1aqwDAAAAAAAAgM9p+ZdMM9U1VN/XvU+WzVwm40eNz+n+P/nJT8pPf/pTI0D/2te+Zszu7u7ulsMOO0wqKytTbn/nnXfKxz72MWP2eEdHh3z0ox+VcePGGSVjNPjWYN20d+9eWb58ufz4xz+Wf//3fzcCeA3Jk23cuFG+/OUvG6H7I488Ih/5yEfks5/9rFGORq1bt07mz5+fdvxz586V2tpaI0A3acg/depUmTNnjlE7/uWXXzYCezcQrAMAAAAAAACAz2lN9ZFC9YmjJ+Z8/zrj+1vf+pZMmjTJCL2vvPJKOemkk1JKq5hOO+00+dCHPmQE7zqLvK6uzvidWbNmGYH75s2bh27b399vlJl573vfa9z2U5/6lDz44IMp9/mjH/3I+N1ly5bJjBkzjLD/5JNPlgceeMD4eWtra8Ya6Tprvb6+Xt59992E+ur6QYE65ZRTpKqqygj43UCwDgAAAAAAAAA+l9yo1M1Q3aQzxDXw1iam2sxUZ55fd9118sYbb6Tcdtq0aUP/v6amRqZMmWKE2+b3fX19Qz/Xpqc6a9x05JFHJgTvpk2bNsnPfvYzowSN+aWNSt9++23j542NjbJr1660Y4/H47J//34jXFc6c11numuQrzSQf9/73mfUaHcDNdYBAAAAAAAAIEDcDtW1BIvO5L766quN77Wmus4a/8AHPiBnnnmmUXs9WUVFYrRs1mVPJ/m2sVhsKIS3ikajRumXc845J2G7BvVKy8CkC/nVm2++KZ2dncaseKW14dVnPvOZhMfVAF7rwi9atEjywYx1AAAAAAAAACihUL2jt8PRfWqgfc8998iGDRsStmvpFA21x44dm9eY29vbZdu2bUPfr1+/XmbPnp1yu0MPPdS4nZaBMb8eeughefbZZ42fX3DBBbJ69eqhOupaGkaD/xUrVsj3vvc9o4nqggULjAD9f//3f+Xss882PjAwv7R5qZascaMcDME6AAAAAAAAAJRIqL6uZZ2097Y7ul+d5X3qqafK5z//eSOk1nB77dq1RhPT3t5eI7zO1/XXXy9//vOfjaao2iT14osvTrnNpz/9aaMu+k9+8hPZunWr3HvvvcbXIYccMlQnXWuwf+5znzNCcm2aquH5VVddZdzvtddea8yEf+WVV4ySMZdccokRtptfRxxxhFHuRkP3np6evPaHUjAAAAAAAAAA4HOxeEx+s+k30hfrk/dPf79Rc72lsyUlVH9hxwsypmqM4/u//fbb5a677pLvfve7smPHDqMu+oknnmjUPNdZ3vk6+eST5eMf/7hxv//yL/9ilJpJtnDhQrnlllvkjjvuMP6dPn26fPvb35Zjjjlm6Db//u//btRo1/D9hhtuMMamTVQ1UP/yl79sbHvmmWeMGfHz5s1LeYyLLrpI/ud//kdWrVolH/7wh3Pen1Bci8oE3BlnnGH8+9RTTxV7KABK2AUrL3Dlfh5a+pAr94Pg4liCmzieAACljPMcAAyXdvnh+h/K3q69Mq52nFRFqtLeRmeqa6h+UM1BctGci6S+aqCRZzG99NJL8slPftKogV5ozz33nEQiETnhhBMK/ljMWAcAAAAAAAAAH9OA/O/n/b1093fbun1NRY0vQnWv6Qx7rxCsAwAAAABQzu4+xZ37uWy1O/cDAEhLg/JyDMv9iualAAAAAAAAAICCOO644zwpA+M1gnUAAAAAAAAAABwgWAcAAAAAAAAAwAGCdQAAAAAAAAAAHCBYBwAAAAAAAAAgKMH6zp075bLLLpOjjz5aTj/9dLn33nuHfrZhwwY5//zzZcGCBXLeeefJG2+8UcyhAgAAAAAAAABQ/GD9n/7pn2TUqFHy61//Wq699lq5/fbb5cknn5TOzk659NJLZfHixcbPjjrqKCOA1+0AAAAAAAAAAJRlsN7W1iZr166Vyy+/XA455BBZsmSJnHTSSfLCCy/IY489JtXV1fKlL31JZs2aJdddd52MHj1aHn/88WINFwAAAAAAAACA4gbrNTU1Ultba8xI7+vrk82bN8urr74qRxxxhKxbt04WLVokoVDIuK3+q+ViNIgHAAAAAAAAAKCYKor1wDoj/atf/ap8/etfl5/85CcSjUbl3HPPNeqqP/XUU3LYYYcl3L6pqUn+8pe/ZL3PeDye8L0G8snbCr29GI/JPrFPfhtLqe6TW/y0T6X4OgVhn9zkl30qxdcpKPvkFj/tk9+3+2ks7BP7FPTtfhoL+zS4Xc8JNral3T54f26O0S0l9zqV4rHHPgVyu5v/nQJBU7RgXW3atElOO+00+du//VsjNNeQ/YQTTpCuri6pqqpKuK1+39vbm/G+9D9qLS9jvb3Wb9f7sv6ezpTXrwMHDkh/f//Qdr2t/s7+/fuNkN+kJWgqKyulvb094Y2jvr5ewuFwwmOqhoYGicVi0tHRkfAmo9v18fRxTZFIxLgfnbFvrR9fUVEhdXV10tPTI93d3ewT+8Q++WSfVCwaSxh7OBI2xmTsp+WaQ8ejf2lE+4f339heEfHVPpXi6xSEfVJ6//GY5VgKhyUUDmU8xtJtV37Zp1J8nYKyT3q/enxYb6/Hhx5fel9D28Mh4zEzHXt+2qdSfJ3YJ/aJfWKf/LxPdYM/0/HoHlpva26XNNsr9Pbx4fPN/rY21/fJybVRtuvyUnidSvHYY5+Cv0+NjY0J+weUk1A83cdPHtBa6tq8dPXq1cZ/3Or73/++PPLII3LwwQfL4YcfLlddddXQ7b/1rW8ZQfxdd92Vcl9nnHGG8e+qVasStvPJI/vEPgV7u5/Goi589EJxw4MffrBgY+R1CsY+uXUsPbT0Id/sUym+TkHZpwtWXiB+fm/y0/PLPrFPQd/up7GwTyW2Tz841Z0Z65c+4/oYC3We88Xz7vJ2P42FfSqffdLvgXJVtBnrb7zxhsyYMWMoVFdz5841gvPFixfLnj17Em6v30+YMCHrfab7jznTf+CF3F6Mxyz0dj+Nxa3tfhqLW9v9NBa3tvtpLG7x2z6V4usUhH1yi5/2qRRfpyDsk1v8tE9B2O6nsbi13U9jcWu7n8bi1nY/jcWt7X4ai1vb/TSWnLbb3Jay3XJ/Qbhm8t3zzrHHPgV8O1Buita8VEPyLVu2JCxN0Qam06ZNkwULFshrr7029AmY/quNTXU7AAAAAAAAAABlGayffvrpRr2mr3zlK/LWW2/J7373O2O2+iWXXCIf/OAHjTpON910k/z1r381/tX6UB/60IeKNVwAAAAAAAAAAIobrGvDg3vvvVdaWlrkox/9qNx8881y+eWXywUXXGA0YLj77rtlzZo1cu6558q6devkBz/4wVDzQAAAAAAAAAAAyq7GujrssMPknnvuSfuz+fPny8MPP+z5mAAAAAAAAAAA8OWMdQAAAAAAAAAAgohgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwoMLJjQGg4O4+xZ37uWy1O/cDAAAAAAAAJGHGOgAAAAAAAAAADhCsAwAAAAAAAADgAME6AAAAAAAAAAAOEKwDAAAAAAAAAOAAwToAAAAAAAAAAA4QrAMAAAAAAAAA4ADBOgAAAAAAAAAADhCsAwAAAAAAAADgAME6AAAAAAAAAAAOEKwDAAAAAAAAAOBAhZMbAxndfYo793PZanfuBwAAAAAAAAAKhBnrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhQ4eTGQNBcsPICV+7noaUPuXI/AICAuvsUd+7nstXu3A8AAAAAoKiYsQ4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAwTrAAAAAAAAAAA4QLAOAAAAAAAAAIADBOsAAAAAAAAAADhAsA4AAAAAAAAAgAME6wAAAAAAAAAAOECwDgAAAAAAAACAAxVObgwAQGDcfYp793XZavfuCwAAAAAABB4z1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAAAABwjWAQAAAAAAAABwgGAdAAAAAAAAAAAHCNYBAAAAAAAAAHCAYB0AAAAAAAAAAAcI1gEAAAAAAAAAcIBgHQAAAAAAAACAoATrvb298m//9m9yzDHHyPve9z657bbbJB6PGz/bsGGDnH/++bJgwQI577zz5I033ijmUAEAAAAAAAAAKH6wfuONN8rzzz8vP/rRj+Tb3/62/PznP5eHHnpIOjs75dJLL5XFixfLr3/9aznqqKPksssuM7YDAAAAAAAAAFBMFcV64NbWVvnVr34l99xzj8yfP9/Y9pnPfEbWrVsnFRUVUl1dLV/60pckFArJddddJ88++6w8/vjjcu655xZryAAAAAAAAAAAFG/G+po1a6Surk6OPfbYoW06S/3mm282wvVFixYZobrSf48++mhZu3ZtsYYLAAAAAAAAAEBxZ6y/8847MnXqVFm+fLncdddd0tfXZ8xGv/zyy6WlpUUOO+ywhNs3NTXJX/7yl6z3adZnN2kgn7yt0NuL8Zi+2yd9LVK22txuuT+39sktfnk9Mm3301hc26eRjplYVCQeS3/79p32H7OyVqSmYcTbu8Uvz69b2/00Frfem1K2Dd6f396XBoZWYq9TEI69Ap7nchmjW0rudSrFY499Yp98NBb2qYT3yc61kUfXTIU8z/nueefYY58Cut3N/06BoClasK710rds2SIPPvigMUtdw/SvfvWrUltbK11dXVJVVZVwe/1em51mov9Rt7W1Jdx+1KhRxn1Zf6+mpsb4OnDggPT39w9t19vq7+zfv1+i0ejQ9tGjR0tlZaW0t7cnvHHU19dLOBxOeEzV0NAgsVhMOjo6Et5kdLs+nj6uKRKJGPejHypY68drKRydzd/T0yPd3d2B2Ke6wfvX7ZFw2Lhf/R3r7bNt1236s/2Dj+3WPhlXeSGRaP/wbY3nviJi/Mx6H3o7fU2MMUYTx6hK4XUKwrFnHkvmOFXC66Tj1Ncp2i/x3gMJJ/FQRbVIpMq47/4XfiAS7ZVw1x6pCIeM5yYWqpT4qHEiobDxeBWRsETbdkq0ola6jr1CpHpM1n1SemxYxx6OhI0xGGO0HHrG2DMce6XwOgXh2Ku17JOOe+h1sr4eeiylOcaSjz19b3Jzn5TxvheLJ45Rj9UMx1i67Sror1NQjj3zvck8lszzVsLrl2W79Vhye5/Snbf0+NDjK+GcGw4Zj5np2CuF16kUjz32iX1in9gnL/bJPM/ZvTYaGqf599Pg+UbPc27vk5Nro2zX5aXwOpXiscc+BX+fGhsbE/YPKCeheLqPnzzwgx/8wGhY+rvf/c6Yua7uvfdeeeCBB2TGjBly+OGHy1VXXTV0+29961uyadMmY3Z7sjPOOMP4d9WqVQnb+eTRw336wanuzOS79BlXx37hoxeKGx5a+pBvXo9M2/00lry22z2Won0i/d0idRONMF2aZkmocfrw7WeeKrLhNyL9PRKSkMTHTBY54iMikcqBn0f7JLRxhcT3/GXgfs74mkj9pKxjdOt4evDDD2be/wBu99NYMh1Lec9YH3xvcmuMxXhvKvrrUYDtnj6mR+e5XMZ4wcoLxM/vTX46Ztgn9ino2/00FvapxPZphPOc7XOfy9dMhTzP+eJ5d3m7n8bCPpXPPun3QLkq2oz18ePHGw1KzVBdHXroobJz506j7vqePXsSbq/fT5gwIet9pvuPOdN/4IXcXozHLPR2x/eRdquN7Un359Y+ucFPr0em7X4ai1vbRzxmNFSfPF9k7Mzh7d1tIpt+N/Az/Wo8WELzPiZSMbgSpr9XZP3PRbrbJVQ52pjZbhx71tnvZX4sOd3up7Fk3Z52a/rtCdtsHBvFeF9y+ri+ez2CfOwV8DwXhGPJ6XY/HTNubffTWNza7qexuLXdT2Nxa7ufxuLWdj+Nxa3tfhpLTtttbgv6NZPvnneOPfYp4NuBclO05qULFiwwlqy89dZbQ9s2b95sBO36s9dee23oEzD999VXXzW2A0CCpllDobqYofq2l42Z6obGg0XSheqt7wx8r9vrsn9oBwAAAAAAAPgiWJ85c6aceuqpcs0118jGjRvl97//vVEe5qKLLpIPfvCDRh2nm266Sf76178a/2p9qA996EPFGi4APzJmo09PDdW1TIytUL1aZO45IpHqIgweAAAAAAAAQVW0YF3deuutMn36dCNM//KXvywXX3yxXHLJJUYDhrvvvlvWrFkj5557rqxbt84I3c3mgQAwFKznE6ovuEikfmIRBg4AAAAAAIAgK1qNdbOb8C233JL2Z/Pnz5eHH37Y8zEBCKDkUH3UWHuhujY07Wgu3rgBAAAAAAAQSEUN1gEgbz0dIi0bE0P1qYvsherWYB4AAAAAAACwiWAdQHDFYyI714qEIomherjCfqi+7RWR7tYiDB4AAAAAAABBVdQa6wCQl74ukWh/7qH6lhcGvgAAAAAAAAAHCNYBBFg8fage67cXqm9+pghjBgAAAAAAQNARrAMIttrG1FB9+xpnoXpNo8eDBgAAAAAAQJARrAMILq2tPml+aqjeuc9+qD7jBJGaBo8HDgAAAAAAgCAjWAcQXJU1IuFI7qH6zFNFpi32eNAAAAAAAAAIOoJ1AAEWSh+qRyrtheo6Wx0AAAAAAABwiGAdQLDFoqmh+rRjnIXq0R4PBwwAAAAAAICgGyxMDABBFBdpfl2krysxVLfWTB8pVO/YJbJ/t4djBgAAAAAAQNAxYx1AcPV1i3S15h6qt+8U2bBcJB7zcNAAAAAAAAAIOoJ1AMEVj+YXqq97QKS/18MBAwAAAAAAoBRQCgZAsEUqUkP1fZtFOppthOqDtdUrqj0cMAAAAAAAAIKOGesAAiwkMnlhaqje8qb9UL1hisjoCR6OGQAAAAAAAEFHsA4guCprRarrcw/VGw8WmbNMJMRbIQAAAAAAAOwjTQIQXNZAPJdQfd7HRCqqPBwwAAAAAAAASgHBOoDgSw7Vx892FqrHYx4PGAAAAAAAAEFG81IAwda6VaRtW2KoPnam/VC9v1fkwG6PBw0AAAAAAIAgY8Y6gOCK9ors3ZRfqL5xxfDPAQAAAAAAABsI1gEEO1jPJ1Rf/3ORth0eDxoAAAAAAABBR7AOIPiSQ/XuNnuheus7qU1QAQAAAAAAgBGQJgEItqZZqaH6tpfth+q6vW5CEQYOAAAAAACAoCJYBxBckSqRxumpoXq0z2aoXi0y9xyRSHURBg8AAAAAAICgqij2AAAgr2A9n1B9wUUioVARBg4AAAAAAIAgY8Y6gOBLDtVHjbUXqo+ZXLwxAwAAAAAAILCYsQ4g2Ho6RFo2JobqUxc5C9U1mAcAAAAAAABsIlgHEFzxmMjOtSKhSGKoHq6wH6pve0Wku7UIgwcAwF8uWHmBK/fz0NKHXLkfAAAAwM8oBQMguPq6RKL9uYfqW14Y+AIAAAAAAAAcIFgHEGDx9KF6rN9eqL75mSKMGQAAAAAAAEFHsA4g2GobU0P17Wucheo1jR4PGgAAAAAAAEFGsA4guLS2+qT5qaF65z77ofqME0RqGjweOAAAAAAAAIKMYB1AcFXWiIQjuYfqM08VmbbY40EDAAAAAAAg6AjWAQRYKH2oHqm0F6rrbHUAAAAAAADAIYJ1AMEWi6aG6tOOcRaqR3s8HDAAAAAAAACCbrAwMQAEUVyk+XWRvq7EUN1aM32kUL1jl8j+3R6OGQAAAAAAAEHHjHUAwdXXLdLVmnuo3r5TZMNykXjMw0EDAAAAAAAg6AjWAQRXPJpfqL7uAZH+Xg8HDAAAAAAAgFJAKRgAwRapSA3V920W6Wi2EaoP1lavqPZwwAAAAAAAAAg6ZqwDCLCQyOSFqaF6y5v2Q/WGKSKjJ3g4ZgAAAAAAAAQdwTqA4KqsFamuzz1UbzxYZM4ykRBvhQAAAAAAALCPNAlAcFkD8VxC9XkfE6mo8nDAAAAAAAAAKAUE6wCCLzlUHz/bWagej3k8YAAAAAAAAAQZzUsBBFvrVpG2bYmh+tiZ9kP1/l6RA7s9HjQAAAAAAACCjBnrAIIr2iuyd1N+ofrGFcM/BwAAAAAAAGwgWAcQ7GA9n1B9/c9F2nZ4PGgAAAAAAAAEHcE6gOBLDtW72+yF6q3vpDZBBQAAAAAAAEZAmgQg2JpmpYbq2162H6rr9roJRRg4AAAAAAAAyq55aUdHhzzyyCPy1ltvyec//3lZt26dzJo1S6ZPn+7uCAEgk0iVSOP01FA92mczVK8WmX2WyJuPFWHw8JVYVCQey/zzjmZ791NZK1LT4NqwAAAAAABACQXrf/7zn+VTn/qUTJ48eej/P/HEE/L444/L3XffLccee6z7IwWAdMF6PqH6gotEQqEiDBy+C9V7OkTCkYFjynpcadje1yWy6obh42b0hOHyQfrzA7uHV0iMHidy8hcJ1wEAAAAAKHE5Bes33nijXHTRRXLllVfKUUcdZWy7+eabZezYsXLLLbfIL3/5S7fHCQCZJYfqo8baC9XHTLY/ExmlS8NxDdUPPm6gXr9Jw/ada0Wi/SK1TSINU0TmLEs8rjauEOntFKmsE4n1i4QrBoJ4gnUAAAAAAEpaTjXW169fL+ecc07K9gsvvFD++te/ujEuALBHw8/kUH3qInuhujWYR3nTWeoaqmsgbobiLRtFQpGBY2biESLHfFbkoOki9ZNEaseKvP2sSHe7SHWdyOgmkYUfF4lUF3tPAAAAAACAX4N1nZmutdWTvfrqq9LU1OTGuADA3kxjY0ZxUqius4bthurbXhHpbi3C4BGYskJ2V0DUTyzCwAEAAAAAQGBKwXz2s5+Vr3zlK/K5z31O4vG4vPjii/Lwww/LfffdJ//8z//s/igBIB0tuaFlOioiuYXqW14Y+AKyhep2V0BQVggAAAAAgLKRU7CuJV8mTJggP/rRj6Smpsaoq37ooYfK17/+dTnrrLPcHyUApBUf+Cc5VNda13ZC9c3PFGHM8HVZIS3/ks8KCMoKAQAAAABQFnIK1n/4wx/K0qVL5f7773d/RADgRG1jaqi+fY1I4wz7oXpNo8eDhm/LCmlNdUVZIQAAAAAA4HaN9bvuukv6+gZn9AFAsWgIOml+aqjeuc9+qD7jhOFmlShfZlmhfFZAUFYIAAAAAICykVOwrrPVv//978vbb78tvb297o8KAOyorBEJR3IP1WeeKjJtsceDRuDKCulxRVkhAAAAAACQbymYZ599Vnbs2GE0LE3nT3/6Uy53CwAOhdKH6pFKe6G6zlan4SRGKiukx5WWFqKsEAAAAAAAyCdY/8Y3vpHLrwGA+2LR1FB92jH2QnVTtMfDAaOkywo1r/d44AAAAAAAIDDB+rHHHmv8q6VgNm3aJLFYTA499FA57LDD3B4fAGQRF2l+faA+tjVUt9ZMHylU79glsn+3h2NG4MoK2V0BMfZQgnUAAAAAAMpETsF6e3u7XHPNNfLUU09JQ0ODRKNROXDggBxzzDFy5513Sn19vfsjBYBkfd0iXa0Ds4lzCdXbd4psWC4Sj3k7bgSrrJDdFRCUFQIAAAAAoGzk1Lz0xhtvlObmZnnsscfkpZdekldeeUVWrFghnZ2dcvPNN7s/SgBIJx4d+DfXUH3dAyL9NGDGCGWFnBxXlBUCAAAAAKAs5DRj/Xe/+53cc889MnPmzKFtWgbmq1/9qnz2s591c3wAkF2kIjX83Lc5cfZwxlB9MATVGe8oc5QVAgAAAAAABZ6xXl1dLeFw6q+GQiGjLAwAeCMkMnlhaqje8qb9UL1hisjoCR6OGb4uK6QoKwQAAAAAAAoRrJ9++unyb//2b7J169ahbdrIVEvEnHLKKbncJQA4V1krUl2fe6jeeLDInGUioZzeClEuZYX0uKKsEAAAAAAAsMgpTfriF79ozFo/88wz5bjjjjO+PvjBDxqNTK+//vpc7hIAnLMG4rmE6vM+JlJR5eGAEciyQk6OK8oKAQAAAABQFnKqsT5mzBj56U9/Km+++aZs2rTJCNkPPfTQhJrrAOCZ5PBz/GxnoTrlO+BWWSGznAwAAAAAAChpOQXrvb29cvvtt8vUqVPl4osvNrade+658r73vU/+8R//USorK90eJwCk17pVpG1bYqg+dqb9UF3Ldxyg4WTZc6Os0CEni6y938NBAwAAAACAQJWC0Vrqq1evljlz5gxt+/znPy/PPPOMfPOb33RzfACQWbRXZO+m/EL1jSuGf47yla2skNMVEAAAAAAAoOTlFKw/8cQTcuutt8qiRYuGti1ZskRuvvlmeeyxx9wcHwBkD9bzCdXX/1ykbYfHg4avpQvVnRxXlBUCAAAAAKAs5FQKJh6PS09PT9rtfX19bowLAOxLDj+72+yF6q3vpM5WRvmirBAAAAAAALAppzTpAx/4gFx//fXyyiuvSGdnp/H16quvyg033CDvf//7c7lLAMhN06zUUH3by/ZDdd1eN6EIA4evUFYIAAAAAAAUesb6NddcI9ddd5186lOfklhsYNl7JBKRs88+W6699tpc7hIAnItUiTROTw3Vo302Q/VqkdlnibxJCauyl62skN0VEJQVAgAAAACgbDgO1vfs2SMHHXSQ3HbbbdLe3i5vv/22vPzyy1JdXS3nnnuujBo1qjAjBYB0wXo+ofqCi0RCoSIMHL6VLlTX42rikQPfU1YIAAAAAAA4KQVz4MAB+dznPicnnXSSEaarp556Si688EK5//77ja9ly5ZJc3NzIccLAKmSQ/VRY+2F6mMmF2/MCE5ZIdsf1lBWCAAAAACAcmE7WL/jjjtk+/bt8rOf/Uxmzpxp1FW/8cYbZf78+fLb3/5W/vd//1dOPPFEufXWWws7YgCw6ulIDdWnLnIWqmuAivLmRlmhueeIRKqLMHgAAAAAAODbYP2JJ54w6qovWrRIQqGQPPfcc8Ys9ksuuUQqKyuN22gpGN0OAJ6Ix0R2rk0N1cMV9kP1ba+IdLcWYfAITFkhuysg6icWYeAAAAAAAMDXwXpLS4tMnz48m+/55583GpbqLHXTuHHjpKury/1RAkA6fV0i0f7cQ/UtLwx8AdlCdacrIAAAAAAAQMmzHaxPnDhR3nlnIEiIx+OyevVqWbBggTQ0NAzd5rXXXpPJkwkXAHglnj5Uj/XbC9U3P1OEMSNwZYWcfFhDWSEAAAAAAMqC7WD97LPPlptuusloWPof//EfsnPnTvn4xz8+9PONGzfKbbfdJh/84AcLNVYASFXbmBqqb1/jLFSvafR40PAdygoBAAAAAAAHBhODkV1++eWyf/9+ufbaa40a61deeaUsXbrU+Nk3v/lNueeee+TUU081bgcAnghFRCbNTw3VO/eJNM6wF6rPOEGkeb33Y4c/ywpVRHJfAUFZIQAAAAAAyobtYL2iokKuueYa4yvZOeecI8uWLZO5c+e6PT4AyKyyRiQcSQ3VlZ1QfeapImMPJVhH9rJCelzpBzWKskIAAAAAAMBJsJ7N7Nmz3bgbAHAolD5Uj1TaC9V1tnpHs8djRuDKCjlZAUFZIQAAAAAAyoLtGusA4EuxaGqoPu0Ye6G6Kdrj4YARuLJCym5ZoZrhht4AAAAAAKB0EawDCLC4SPPrqaG6NdwcKVTv2CWyf7eHY0bgygrZXQExbbHHgwYAAAAAAMVCsA4guPq6Rbpacw/V23eKbFguEo95OGgErqyQ0xUQAAAAAACg5BGsAwiueDS/UH3dAyL9vR4OGIEsK+TkuKKsEAAAAAAAZcGV5qUAUDSRitTwc9/mxKakGUP1nuH62Shzg2WF+roGvqWsEAAAAAAAyIIZ6wACLCQyeWFqqN7ypv1QvWGKyOgJHo4ZvkRZIQAAAAAA4ADBOoDgqqwVqa7PPVRvPFhkzjKREG+FZS9bWSE9rigrBAAAAAAALEiTAASXNRDPJVSf9zGRiioPB4xAlhVyclxRVggAAAAAgLJAjXUAwZccfo6f7SxUp3wH3CorZJaTAYBM7j7Fvfu6bLV79wUAAADAEWasAwi21q2pofrYmfZDdS3fcYCGk2WPskIAAAAAAMABEgAAwRXtFdm7Kb9QfeOK4Z+jfGUrK+R0BQQAAAAAACh5BOsAgh2s5xOqr/+5SNsOjwcNX0sXqjs5rigrBAAAAABAWaDGOoDgSw4/u9vsheqt7wx8T/kOmGWF2rYNf09ZIQAAAAAAkAFpEoBga5qVGqpve9l+qK7b6yYUYeDwFcoKAQAAAACAIAbrl156qVx99dVD32/YsEHOP/98WbBggZx33nnyxhtvFHV8AHwoUiXSOD01VI/22QzVq0XmniMSqS7C4BGYskJ2V0BQVggAAAAAgLLhi2D90UcfldWrVw9939nZaQTtixcvll//+tdy1FFHyWWXXWZsB4CEYD2fUH3BRSL1E4swcPhWulDdyQoIygoBAAAAAFAWil5jvbW1VW655RaZN2/e0LbHHntMqqur5Utf+pKEQiG57rrr5Nlnn5XHH39czj333KKOF4APJYfqo8baC9XHTBbpaC7euEvEsjuec+V+VlxxoviyrJDtD2soKwQAAAAAQLko+tS6b37zm3L22WfLYYcdNrRt3bp1smjRIiNUV/rv0UcfLWvXri3iSAH4Uk9Haqg+dZG9UN0aoKK8UVYIAAAAAAAEZcb6Cy+8IK+88oqsWLFCbrjhhqHtLS0tCUG7ampqkr/85S9Z7y8ejyd8r4F88rZCby/GY/pun/S1SNlqc7vl/tzaJ7f45fXItN1PY3Ftn0Y6ZuIxkZ1rRUKRge2jxkpcQ/VwxcCxpHWzX/+5hNq2SVx/o6JGZMGFIvWTjJ8bj7ntFZHu1oHbDz5+uR9LuWzP47/6xHsp5NizjSaprFBo28sST1gBcb5IpHLgtloW5nWtqW6G6jUSWnDR4H3HPT2WjEcs5/eIYo1lpCM7Fh14f0p3e22Ea3efKmtFahpGHKNbSu51CsKxl++75wjvNU73yS0l9zr5aLufxsI+lfA+pXkPSthmOc+l3HbwPGfrMYt8nvPd886xxz4FdLub/50CQVO0YL2np0e+9rWvyVe/+lWpqalJ+FlXV5dUVVlCDhHj+95eS3O5JPofdVtbW8LtR40aZdyX9ff0sfTrwIED0t/fP7Rdb6u/s3//folGo0PbR48eLZWVldLe3p7wxlFfXy/hcDjhMVVDQ4PEYjHp6OhIeJPR7fp4+rimSCRi3E9fX19C/fiKigqpq6sznqPu7u5A7FPd4P3r9kg4bNyv/o719tm26zb92f7Bx3Zrn4yrvJBItH/4tsZzXxExfma9D72dvibGGKOJY1Sl8DoF4dgzjyVznCrhddJxDr5O0tcpoWi/bpB47UESGgzV9We9nR0S+eOvJNy+3bifWLhK+uacK1I1Vt+AjG2VO16R2NvPSayvXzo72iUeq826T0qPDevYw5Gw8TwbY7QcesbYMxx7pfA6JYob2U5M/8ga3qvB/54ybU98L5DB/87c3Kdayz7puIdeJ+vroWMx/9DSr67WgZnqsYH71ONKphwtvf1xo856TUVY4usekti+LQM/r6iW2BHnSfWYyRJr3S7R3j7pGjyWsr1Oynjfi8UTxxgOZTzG0m1X5fYeUax9Mt+bzGPJPG8lvH66vb9v4L2pun5oPBo+xHv1MeMSffx6Y4VDpGGy/pL09fYZPw917jGOsYrKColLSPqrx0qssla6539SQjUNWfcp3XlLjw89vhLOueGQMc5Mx14pvE5BOPbsXjNlPMYs2/W6yc19cnJtZBxjGbaXwutUisce+8Q+Zdsn873JvDZKd82kon29Eop2S6iqbmi/dDKLMfFAf67nudpGqaxrMq4BjWvhaK+EDrRIKBQ3ruNjoUqJ1oyVWKTSOM9V1Y/Luk9Oro2yXZeXwutUisce+xT8fWpsbEzYP6CcFC1Y/+53vytHHnmknHTSSSk/0/rqySG6fp8cwFuZb0LJamtrja9k+iaRjr4xpTNmzBhbj2n8QREOpx2LvvGl265vVum26/OgX4HYp8ELLevvmBdfdrbr/avkx8h7n0KWID1lMOm3G2NMs70kXqcg7JPl+DBnwGQ6lszXV2cUm6G68bN4VKrf/I3IgZ0iGj5WVEt4wYVSXW8p/7L1BZHNqyUcCku4skLG1I8RqW8YcZ/MMDNZujEa29McSyXxOiVuNf5QSv86ZX790m13dZ/S3H/asZgD7d0v0rLRCNWNY89yXBlj0hUQ638uofZtA6/r0AqIyUOvazjWJZVJx1KmfTLe98L2j7FM28vuPaJY+5R07JjnrWRhPXhqxogc/gGRqtED5ap2rh34EFDHNW2hyBEfGVgBEQpJpZYV+tMj+vHtwD4ZZYXOlspwRORPK6S6tlJEj6ks+5TpvKVheUTvx+axVxKvUxD2yeY1U8ZjzLLd+jhu7JPTa6NM20vidUrCPrFPJb9PSdfgye9LQ9fleqKrqB8+z7VuFdm7aeiyvGLeOQNlGfU5iMclvH+XyIbfiITHDdygYaqE5yyTcE/b8HlucD8y7ZPTa6NM1+Ul8ToljZ99Yp/8sk9AuSpasP7oo4/Knj175KijjjK+N4P03/72t7J06VLjZ1b6/YQJ2ZvCpVt+kmlJSiG3F+MxC73d8X2k3Wpje9L9ubVPbvDT65Fpu5/G4tb2EY+Z2saBi3czVNeZxtvXiDTOGLjVYE31kLWm+paBUH3gfkIiNY0Dx57l8cv9WHK+Pef/6j0cY5bRJJUVSv6wxqi1vv4XRk1145hJU6s/pMedlhXy8FjKdv9l8x5RrLGk3WrZrmGD0g9rQhEJafioZYWOvTShVn9Ia/Vr+K4z3K3vV0Zj5cFjydJzppBK8nUKwj6l3Wpzu433mnK/ZvLTWNza7qexuLXdT2Nxa7ufxpLT9pG26Xmuc69I27aBD4XV+NkiRywdvr2Whdn0u4FeNkY/m4GeNSE9D+p5TmfBFvk857vnnWOPfQr4dqDcFC1Y/+lPf5qw/OTWW281/r3qqqvk5Zdflv/+7/82lpvof6z676uvviqf+9znijVcAH6kIeik+cPhpxmqd+4bCNbTNSo1QvVnhr+fcYJI83rvxw5/6evSuj1GWaGhBrjW42qkBrh6XOkXYNKwXEN1GisDAEqRzlRv2zb8vYbqY2cOf6+h+roHjDJnGRvBH9jt8aABACiRYH3q1Klpl6PMmDHDaFT67W9/W2666Sa58MIL5cEHHzTqQ33oQx8q0mgB+FJlja4BTQ3VlZ1QfeapImMPJVjHcNutdKH60AoIm8cVkGYFRMJxZSdUNxsrAwDgN1oeb++mgfNXrqH6xhXDPwcAIKDSFyUrMq3tdPfdd8uaNWvk3HPPlXXr1skPfvCDoeaBADAglD5Uj1TaC9V1tjqQoaxQTh/WaFkhwFwBkWuozgoIAIDfg3VTLqG6ngfbdng8aAAASmjGerJvfOMbCd/Pnz9fHn744aKNB0BAxKKpofq0Y5yF6lFmy5S9bGWFFGWF4NYKCDuhOisgAABBkByqawkzO6G6eR4M+XKeHwAAtnEmAxBgcZHm11ND9ZoG+6F6xy6R/dR3LHvZygrZXQExbbHHg4avZVoB4SRUZwUEAMCvmmalhurbXrYfquv2uglFGDgAAO4hWAcQXH3dIl2tuYfqulR1w/KBesgoc1nKCjldAQG4tQLC+n4GAIBfRKpEGqenhupmw+4RQ/VqkbnniEQGa7QDABBQBOsAgisezS9UN5aqWmpEorxlKivk5LiirBDcaqzMCggAgJ+D9XxCdT0P1k8swsABACjRGusAkJNIRWr4uW+zSEezjVC9Z/gCH2VusKyQNp1UlBVCsRsrW9/DAADwo+RQXXuL2AnV9TzIeQ4AUAKYsQ4gwEIikxemhuotb9oP1RumiIymvmPZo6wQ3EZjZQBAKevpSA3VtbeInVDdGswDABBgBOsAgquyVqS6PvdQXZeqzlkmEuKtsOxlKyukxxVlheAIjZUBACVMJxLsXJsaqpu9ReyE6tteEekenNQAAEBAkSYBCC5rIJ5LqG5dqgpkKivk5LiirBAUKyAAAKVMS+dF+3MP1fU8qF8AAAQcNdYBBF9y+Dl+trNQnfAKbpUVMsNUlDcaKwMASlo8faiuvUXshOrW8yAAAAHGjHUAwda6NTVUHzvTfqiu4dUByi2UPcoKwasVELZCdVZAAAB8rrYxNVTX3iJOQvWaRo8HDQCAu0gAAARXtFdk76b8QvWNK4Z/jvKVrayQ0xUQAI2VAQClLBQRmTQ/NVQ3e4vYCdX1HGg9TwIAEEAE6wCCHaznE6rrUtW2HR4PGr6WLlR3clxRVgiKFRAAgFJWWSMSjuQequt5cNpijwcNAID7+IsNQPAlh5/dbfZCdXOpKuEVFGWF4BYaKwMASloofaiuvUXshOrW8yAAAAFGmgQg2JpmpYbq2162H6rr9jrKLZQ9ygqhEGisDAAoVbFoaqiuvUWchOpRrpsAAMFGsA4guCJVIo3TU0P1aJ/NUL1aZO45IhEaBJa9bGWF7K6AoKwQrFgBAQAoWXGR5tdTQ3VrzfSRQvWOXSL7Oc8BAIKNYB1AsIP1fEJ1XapaP7EIA4dvpQvVnayAoKwQFCsgAAClrK9bpKs191Bdz4MblrMyCwAQeCQAAIIvOVQfNdZeqG5dqgpkKitk+8MaygphEI2VAQClLB7NL1Q3zoOWcyUAAAFFsA4g2Ho6UkP1qYucheoaoKK8UVYIhUBjZQBAqYpUpIbq2lvEVqjeM3z9BABAgPEXG4Dg0uWjO9emhurhCvuh+rZXRLoHl7KifGUrK2R3BQRlhWBFY2UAQMkKiUxemBqqW3uLjBSqN0wRGc15DgAQbATrAIKrr0sk2p97qK5LVfULyBaqO10BAbACAgBQyiprRarrcw/V9Tw4ZxkrswAAgTeYQAFAEMXTh+qxfnuhunWpKqBlhVo25rcCgrJCcKuxcihUhIEDAGCDNRDPJVTX82DXPg8HDABAYfARMYBgq21MDdW3r3EWqtc0ejxo+A5lhVAINFYGAJSy5FBde4vYCdXN86BefwEAEGAE6wCCKxQRmTQ/NVTv3Gc/VNeLf2t9SJSnbGWF7K6AoKwQrGisDAAoZa1bU0N1a2+RkUJ1PQ8e2O3xoAEAcBfBOoDgqqwRCUdyD9V1qeq0xR4PGoErK+R0BQTACggAQCmL9ors3ZRfqL5xxfDPAQAIKIJ1AAEWSh+qRyrtherWpapAprJCTj6soawQFI2VAQClHqznE6rrebBth8eDBgDAfTQvBRBssWhqqD7tGGehepTZMmXPrbJCzes9Hjj8icbKAID0lt3xnCv3s+KKE6XokkN1LWFmJ1Q3z4PWJqgAAAQQwTqAAIuLNL8+MDvUGqpba6aPFKp37BLZT33HspetrJDdFRBjDy1IsB6NRSUm6Zt7tXS22L6fmooaqa+qd3FkyGkFROOMge9ZAQEACLKmWamhuvYWmXikvVBdt9dNKMLAAQBwD8E6gODq6xbpah0IqHIJ1XWp6oblA/WQUeaylBWyuwKio7kgofqBvgNGKF4ZrhzaHovHpDvaLbe/ervxfVW4SppqmyQ8OPNLf763a6/0xgaWaoclLFPqpsgn5n6CcL3YKyA0WGcFBAAgyCJVIo3TU0N1s7fIiKF6tcjss0TefKwIgwcAwD0E6wCCKx4d+DfXUN1YqmqpEYnylqmskJPjyuWyQjpTXUP1M6afIaMqRxnb9vftlw17N0h/rF8aqxtl4qiJsmT6EqnU8ernTdE+WbV1lXT1d8koGSVVkSo5dtKx8tru16S7v5tgPSiNlQu0AgIAAFeC9XxCdT0PhgYnNQAAEGAE6wCCLVKRGn7u25w4ezhjqN4zfIGPMuffskI6U11DdQ3EO3o7ZFPrJomEIhKJROSwxsNk6cylCaH6ys0rjfB9dOVoI1T/yKyPSEhCRrCOADVWLsAKCAAAXJUcqmtvETuhup4HOc8BAEoA3UIABFhIZPLC1FC95U37oXrDFJHR1Hcse2ZZIeXTskIaqq9rWWfMVFc6Wz1dqL7zwE7jezNUnzCK47soaKwMAChlPR2pobr2FrETqluDeQAAAoxgHUBwVdaKVNfnHqrrUtU5y0QG61KjjGUrK6THVZHLCukM9ORQfd64eY5CdQ3m4fEKiHzKCtFYGQDgVzqRYOfa1FDd7C1iJ1Tf9opI9+CkBgAAAoo0CUBwWQPxXEJ161JVIFNZISfHVQHKCmkjUrOmujVUjwzW8LYTqq9vWS/tve2ujw3BXQEBAEDOtHRetD/3UF3Pg/oFAEDAEawDCL7k8HP8bGehOuEVfFxWqDvanVeo/uquV2XN7jWujwtZ0FgZAFDS4ulDdb1esROqW8+DAAAEGME6gGBr3Zoaqo+daT9U1/DqAOUWyp6PywrF4/G0oXo0FrUVqr+480XXx4Q8VkDYCtVprAwA8LnaxtRQXXuLOAnVaxo9HjQAAO4iWAcQXNFekb2b8gvVN64Y/jnKV7ayQk5XQBRAQ1VDSqi+fs96R6H6mKoxBRsfgrMCAgCAvIUiIpPmp4bqZm8RO6G6ngOt50kAAAKIYB1AsIP1fEJ1XaratsPjQcPX0oXqTo6rApQVioQiMmfsnJRQvbWn1XaovmjCIqmvsszIR9mugAAAIG+VNSKD1yU5hep6Hpy22ONBAwDgPv5iAxB8yeFnd5u9UN1cqkp4BR+XFaqpqMkrVD9+8vEyb/w818eFLGisDAAoaaH0obr2FrETqlvPgwAABNjg2i0ACKimWamh+raXRSYeaS9U1+11lFsoe2ZZIbOmtQ/LCiWH6hXhCluh+tETj5aWzpaCjQtZ0FgZQKHdfYo793PZanfuB+UjFk0N1bW3iJNQPUo5RgBAsDFNE0BwRapEGqenhurRPpuherXI3HNEIjQILHvZygrZXQFRwLJC6UL1BeMX2ArVTb3WfUTZroAAACB/cZHm11NDdWvN9JFC9Y5dIvs5zwEAgo1gHUCwg/V8QnVdqlo/sQgDh2+lC9X1uCpyWaGN+zamhOrWmukjhep7uvbI3q69BRkb0qCxMgCglPV1i3S15h6q63lww3JWZgEAAo9gHUDwJYfqo8baC9WtS1WBTGWFbH9YU5iyQt393dLW25ZzqL67c7c8seUJiQl/vHqGxsoAgFIWj+YXqhvnQVbSAQCCj2AdQLD1dKSG6lMXOQvVNUBFefNxWaHo4B+vuYbqj2x6hDIwxUJjZQBAqYpUpIbq2lvEVqg+eB40e9sAABBQ/MUGILh0+ejOtamherjCfqi+7RWR7sGlrChf2coK2V0BUcCyQulC9a3tWx2F6lVhyz6ieCsg7IbqNFYGAPhWSGTywtRQ3dpbZKRQvWGKyGjOcwCAYCNYBxBcfV0i0f7cQ3VdqqpfQLZQ3ekKCJeFQiGZ2zQ3JVTf3LbZdqg+cdREaaptKtgYEZwVEAAA5K2yVqS6PvdQXc+Dc5axMgsAEHicyQAEWDx9qB7rtxeqW5eqApnKCjn5sKYAZYVqIjVSV1mXc6g+efRkWTJ9iYT549U7NFYGAJQy6zVFLqG69TwIAECA8Vc2gGCrbUwN1bevcRaq1zR6PGj4jo/LClkD8VxC9aUzl0qlNheD92isDAAoZcmhuvYWcRKq6/UXAAABRrAOILhCEZFJ81ND9c599kN1vfi31odEecpWVsjuCogClxVKDtVnNsx0FKrH+OPVWzRWBgCUstatqaG6tbfISKG6ngcP7PZ40AAAuGswNQA8Eotmn5nQ0eysth+BaHmrrBEJR3IP1XWp6thDRZrXezxwBKqskB5XjTOKWlZoe8d2ae5sTgjVp4+ZbjtU74v2yd6uvQUdI9KsgNAP/3y2AgIAgLzp9cbeTQPnr1xD9Y0rhn8OAEBAEazD21BdZ4VqWYN4dHBjaCAgN0sdvHLPwAw9a5igZTqsAXq0R2T/bpHag0RO+RLhelkLpQ/VNUy0E6rrbHUnH+agPMsK6XGlwXqRygr1xfpkS8cWqR5sZJlLqL5q6yrpjQ38HB6ugKiI0FgZAFB6Bq85cg7V9TzYtsPjQQMiF6y8wLX7emjpQ67dF4DgIliHtzP4NEAfPW6gsVukQmTywsSO8k2HDYQJtU0D32vwOW3x8M87dolsWC6iAVPn3oHwgmC9vOkHNsmh+rRj7IXq1g9rUN7cKitUgNUPZmCea6i+cvNK2dW5y/VxIRsaKwMAykByqK4TpOyE6uZ5kMbqAICAI1iHt3Smuobq1XUD4ac1FNfmNxqU688ydZTf9NTA71eO0oTC+/HDZ+Iiza8PHDfWUN16XI0UquuHNboCAuUtW1khuysgClxWKDlU7+jtsBWq7zyw0/g+TFsVf6yAcFJWiMbKAAC/apqVGqprb5GJR9oL1XV73YQiDBwAAPfwVza8pzPV04Xq1uY36UJ16+yHhikio7kQK3t93SJdrbmH6npc6QoImjoiW1khpysgCmBG/YyUUH1dyzrboXpVpEqazJVAKDwaKwMASplOdGqcnhqqmw27RwzVq0XmnjOwChkAgAAjWIfHQgPlX/IJ1fVCbc4ylg5iuFZ/rqG6cVxRdxojlBVyclwVoKyQhuJT66emhOr9GtbaDNXPnHGm8S8C1FjZWgYNAAA/sV5T5BKq63mwfmIRBg4AgLsoBQNvaaNSa031XEJ1vVDrGgwogEwrIKxNSUc6rvQCH2XOv2WFKsOVeYXqH5n1EQmZM/LhERorAwDKQHKorr1F7ITqeh7kPAcAKAFM+YW3rLPMk0N1bX5jJ1Q3L9Qo3wE3VkBQVggBKSuUHKo3VjfaCtUnjOL4LgoaKwMASllPR2qorr1F7ITq1mAeAIAAI1hHcaQL1a3Nb0YK1fVC7QANJ8ueGysgKCuEkcoK6XFV5LJC+/v2p4Tq88bNcxSqazAPj1dA5FNWiMbKAAC/0okEO9emhupmbxE7ofq2V0S6Byc1AAAQUKRJ8F7r1vxD9Y0rhn+O8pVtBYTdskLmcQW40Vi5AGWFYvGYbNi7ISVUjwzW8LYTqq9vWS/tve2ujw3BXQEBAEDOtHRetD/3UF3Pg/oFAEDAEazDW9Fekb2b8gvV9UKtbYfHA4evUVYIJVxWqDvanVeo/uquV2XN7jWujwtZ0FgZAFDS4ulDdb1esROqW8+DAAAEGM1LA2TZHc+5cj8rrjhRihqsZwrVtcaenVDdvFCjfAfMFRBt24a/p6wQilVW6JCTRdbe7/rQ4vF42lA9GovaCtVf3PmiBEVJnOdMNFYGAJSy2sbUUF17izTOsB+q1zR6PGgAANxFMoniSBeqa/Mbu6G6bq+jIV/Zc2MFBGWF4HZj5QJoqGpICdXX71nvKFQfUzWmYONDcFZAAACQt1BEZNL81FDd7C1iJ1TXc6D1PAkAQAARrMN7TbPSh+pm85sRQ/VqkbnniESYyVf2sq2AoKwQitFYuQBlhSKhiMwZOyclVG/tabUdqi+asEjqqywz8lFYNFYGAJSyyhqRweuSnEJ1PQ9OW+zxoAEAcB9/scFbkSqRxun5hep6oVY/sQiDh29RVgh+aaxcgLJCNRU1eYXqx08+XuaNn+f6uJAFjZUBACUtlD5U194idkJ163kQAIAAI02C98F6plBdm9/YCdWtF2pAphUQlBVCiZUVSg7VK8IVtkL1oyceXbAxYQQ0VgYAlKpYNDVU194iTkL1KOUYAQDBRrCO4kgXqmvzGyehut4HypsbKyAoKwS3GisXsKxQulB9wfgFjkL1Xus+omxXQAAAkL+4SPPrqaG6tWb6SKF6xy6R/ZznAADBRrAO7/V0pA/VzeY3dkL1ba+IdA8ETChj2VZAUFYIxWisXKCyQhv3bUwJ1a0100cK1fd07ZG9XXsLMjYEcwUEAAA56+sW6WrNPVTX8+CG5azMAgAEHsE6vKUXTzvX5heq64WafgEmygrBF42VC1NWqLu/W9p623IO1Xd37pYntjwhMeGPV8/QWBkAUMri0fxCdeM8yEo6AEDwEazDW31dItH+9KG6Nr+xE6pbL9SATCsgKCuEEikrFB384zXXUP2RTY9QBqZYaKwMAChVkYrUUF17i9gK1XuGr58AAAgw/mKDx+KZQ3VtfuMkVK9p9HLgKNUVEJQVgluNlQtYVihdqL61faujUL0qbNlHFB6NlQEAJSskMnlhaqhu7S0yUqjeMEVkNOc5AECwEazDe7WN6UN1s/mNnVBdL9KsF3IoT9lWQFBWCMVqrOyyUCgkc5vmpoTqm9s22w7VJ46aKE21TQUbI4KzAgIAgLxV1opU1+cequt5cM4yVmYBAAKPMxm8FYqITJqfX6iuF2rTFns8cARuBQRlhVCMxsoFKCtUE6mRusq6nEP1yaMny5LpSyTMH6/eobEyAKCUWa8pcgnVredBAAACjL+y4a3KGpFwJH2ors1v7ITq1gs1INMKCMoKoUTKClkD8VxC9aUzl0qlvr/CezRWBgCUsuRQXXuLOAnV9foLAIAAI1iHx0KZQ3VtfuMkVI8OXqChfLmxAoKyQnCrsXKBywolh+ozG2Y6CtVj/PHqLRorAwBKWevW1FDd2ltkpFBdz4MHdns8aAAA3EWwDu/FoulDdWu4OVKo3rFLZD8XYmUv2woIygqhWI2VC2B7x/aUUH36mOm2Q/W+aJ/s7dpb0DEiGCsgAADIm15v7N2UX6i+ccXwzwEACCiCdXgsLtL8en6hul6obVjO0kFkXwFBWSEUo7FyAcoK9cX6ZEvHlrxC9VVbV0lvbODn8ACNlQEApWzwmiPnUF3Pg207PB40AADuI1iHt/q6Rbpa8wvVjQs1AiKMsAKCskIokbJCZmCea6i+cvNK2dW5y/VxIRsaKwMAykByqK4lzOyE6uZ5kMbqAICA40wGb8WjmUN1bX5jK1TvGQ4kUOZcWAFBWSG41Vi5wGWFkkP1jt4OW6H6zgM7je/DnPK9RWNlAEApa5qVGqprbxG7obpur5tQhIEDAOAe/sqG9yIV6UN1a/ObkUL1hikio7kQK3turICgrBDcbqxcADPqZ6SE6uta1tkO1asiVdJU21TQMSIYKyAAAMhbpEqkcXpqqG72FhkxVK8WmXuOSISJUgCAYCNYh8dCIpMX5heq64XanGUsHUT2FRCUFUIxGisXoKyQhuJT66emhOr9GtbaDNXPnHGm8S88QmNlAEAps15T5BKq63mwfmIRBg4AgLsGp1IBHqmsFamuzy9U1wu1rsGAAsi0AqKjefh7ygrBblkhbTrps7JCleHKvEL1j8z6iITMGfkITmNl63sYAAB+lByqa28RO6G6ngc5zwEASgBTfuEt6yzz5FBdm9/YCdXNCzXKd8CNFRCUFUJAygolh+qN1Y22QvUJozi+i4LGygCAUtbTkRqqa28RO6G6NZgHACDACNZRHOlCdWvzm5FCdb1QO0DDybLnxgoIygrBtcbKhSsrtL9vf0qoPm/cPEehugbz8AqNlQEAJUwnEuxcmxqqm71F7ITq214R6R6c1AAAQECRJsF7rVvzD9U3rhj+OcpXthUQdssKmccV4EZj5QKUFYrFY7Jh74aUUD0yWMPbTqi+vmW9tPe2uz42BHcFBAAAOdPSedH+3EN1PQ/qFwAAAUewDm9Fe0X2bsovVNcLtbYdHg8cvkZZIZRwWaHuaHdeofqru16VNbvXuD4uZEFjZQBASYunD9X1esVOqG49DwIAEGAE6/A+WM8UqmuNPTuhunmhRvkOuLUCgrJC8HFZoXg8njZUj8aitkL1F3e+6PqYkMcKCFuhOo2VAQA+V9uYGqprbxEnoXpNo8eDBgDAXSSTKI50obo2v7Ebquv2OhrylT03VkBQVghuN1YugIaqhpRQff2e9Y5C9TFVYwo2PgRnBQQAAHkLRUQmzU8N1c3eInZCdT0HWs+TAAAEEME6vNc0K32obja/GTFUrxaZe45IhJl8ZS/bCgjKCqEYjZULUFYoEorInLFzUkL11p5W26H6ogmLpL7KMiMfZbsCAgCAvFXWiAxel+QUqut5cNpijwcNAID7+IsN3opUiTROzy9U1wu1+olFGDx8i7JCKOGyQjUVNXmF6sdPPl7mjZ/n+riQBY2VAQAlLZQ+VNfeInZCdet5EACAACNNgvfBeqZQXZvf2AnVrRdqQKYVEJQVQomVFUoO1SvCFbZC9aMnHl2wMWEENFYGAJSqWDQ1VNfeIk5C9SjlGAEAwUawjuJIF6pr8xsnobreB8qbGysgKCsEtxorF7CsULpQfcH4BY5C9V7rPqJsV0AAAJC/uEjz66mhurVm+kihescukf2c5wAAwUawDu/1dKQP1c3mN3ZC9W2viHQPBEwoY9lWQFBWCMVorFygskIb921MCdWtNdNHCtX3dO2RvV17CzI2BHMFBAAAOevrFulqzT1U1/PghuWszAIABN5gkgl4RC+edq4d6CSfa6iuF2r6BbhRVqijuXjjRok1Vi5MWaHu/m5p622T6kh1TqH67s7d8sSWJyQm/PHqGRorAwBKWTyaX6hunAdZSQfLNXdflzvN463HIgB4gGAd3tITZrRfpCKSGqpr8xs7obr1Qg3QFRAtGykrhOKXFZp9lsibj7k+tOjgH6+5huqPbHqEMjDFQmNlAECpilSkhuraW8Q6aWWkht16/YTyptdGv/9Pkf3Nw9c+OlHFWqpTb2NdrV7TmHjcaa1+LStUe5DIKV8iXAfgKYJ1eCw+8E+6UF2b3zTOsB+q6wkV5c2NFRCUFYJbjZVDoYINL12ovrV9q7R0ttgO1avCln1E8VZATDxy4HsaKwMAAiskMnlhaqiuvUWmHGUvVG+YMlxOBuU98U5D9YpakdrGgf5X1lKd+rea5gC1TQPf6zE1bXFirX4tK6RBfOfegfsjWAfgIaZCwXt6wkwXqpvNb+yE6npC5YQJcwWEoqwQ/NJY2WWhUEjmNs1NCdU3t222HapPHDVRmsw/SFB4NFYGAJQyLblRXZ8aqptGCtX1PDhnGSuzkJgRHHupyJQFIvWTBr72vSXSvF6kum7g64ilA1/mz+NxkU1PDVx3VY5iBQSAouBMBm/pzOJJ8/ML1fVCzfopNcpYlhUQlBVCMRorF6CsUE2kRuoq63IO1SePnixLpi+RMH+8eofGygCAUma9psglVLeeBwE9nnRCwUgZwEgrIEaz0g+A9/grG96qrBEJR9KH6tr8xk6obj2hAplWQDgJ1SkrBLOsUD6heoHKClkD8VxC9aUzl0qlvr8iWI2VAQDwu+RQXXuLOAnV9foL0NJ31gkFTkN1VkAAKCLeeeCxUOZQXZvfOAnVtUkJypsbKyAoK4SRygrZXQFR4LJCyaH6zIaZjkL1GH+8+mMFBI2VAQCloHVraqhu7S0yUqiu58EDuz0eNHzJWvoul1CdFRAAyjVY37Vrl1x55ZVy7LHHykknnSQ333yz9PQMvEG+88478ulPf1oWLlwoZ511ljz33HPFHCrcFIumD9Wt4eZIJ1RtUqKdv1Hesq2AoKwQ3GysXOSyQts7tqeE6tPHTLcdqvdF+2Rv196CjhHBWAEBAEDe9Hpj76b8QvWNK4Z/DrgRqjOJBEA5BevxeNwI1bu6uuT++++X//zP/5Snn35abr/9duNn//AP/yDjxo2TX/3qV3L22WfLF77wBdmxY0exhgvXxEWaX88vVNcTqnb+5sSJbCsgKCuEYjRWLkBZob5Yn2zp2JJXqL5q6yrpjQ38HB6gsTIAoJQNXnPkHKrrebCNv+0hiRMK8gnVWQEBoEgG/8rz3ubNm2Xt2rXyhz/8wQjQlQbt3/zmN+Xkk082Zqw/+OCDMmrUKJk1a5a88MILRsh+xRVXFGvIcENft0hX60CQkGuobpxQCYgwwgoIygqhGGWFmte7PjQzMM81VF+5eaXs6tzl+riQDY2VAQBlIDlU1xJmdkJ18zxITWyYx41e/1TX5R6qswICQJEU7Uw2fvx4+eEPfzgUqpv2798v69atk7lz5xqhumnRokVGEI+Ai0cH/k0XqmvzGyefUmsggTLnwgoIygrBrcbKBS4rlByqd/R22ArVdx7YaXwfpq2Kt2isDAAoZU2zUkN17S1iN1TX7dq0ErCWvsslVGcFBIBynLE+ZswYo666KRaLyc9+9jM5/vjjpaWlRSZMSDzJNjU1SXNzc9b71BIyVqFQKGVbobcX9jHjg6UvUm/vZLveb0H3aaSRRCoSwk9ju9lRfsrCgRvPPFVCM04Yvv8OPaE+KNLfLSEJSVyDCL14058P3ibTeNzi9bHkdLufxuLaPo10LCWtgAhNO0biZqiu97V1IKQyjhn9Db1Qm3788DHT0Sxxs6wQx1Je2/N9Xxq6l0KOPetoEssKhTr3Db5fDX5YUz/JOD6M+97yfEqobrxf6YW//lYBjqUZ9TNSQvV1Letk9kGzjfufUjdFPnzoh6UiXGF83x/rN0L1HfsH/sioCldJU22T8TN/v3c4Pc/58VhKXQERivVL3PywpnG6SEWNyIILJTRm8vD9J79fGSsgXh86nrKN0S2cn4qwT/m+e47wXuN0n9xScq+Tj7YH4VhybZ8CvD37e7Yb10wF3qeRzriRqoHzmbndDNWN3iJxkYaDJTTvYxLX6yi9f50E8LqGn+8MnOf02n32h0TefKyo5zk/HTNubffTWGxtH/qZ/q12StoMINTfM3Bt1KCh+vkD1+d6zET7JP76Q8ZxZfy+roDQ7VnOQ27K57kv+vPu8na3n1sgSIoWrCf71re+JRs2bJBf/vKXcu+990pVVWJXZ/2+tzdz+Q/9j7qtrS3h9jrjXWu4W3+vpqbG+Dpw4ID09w/WP9XV2qNGGb+jM+aj0cFZ1SIyevRoqayslPb29oQ3jvr6egmHwwmPqRoaGowPCTo6OhLeZHS7Pp4+rikSiRj309fXJ52dnUPbKyoqpK6uzmjk2t3dPbQ9FotLOBwy/o1b6ouHQmFjezQ2GA4O0vHpY0ejetvh7ToOt/epbvA50+2RcNi4X/0d6+0H54JKfNICkeoxwxfw77411FE+2h+V6CEnS2TqscbBqc9NrG2HRN74uXFCDUfCEjpouvROPV5Cr90vXR3tEo/VZnydzIxG79cqUhExfmZ9rfV2+poYY48mjl3ZfZ1K8djzcp/MY8kcp0p4nXScg6+ThqDGhZOGV1MXS2gwVNef9f31WYm8/ezAsReJSHTGydI/8WiRwQbJkc7dUvnHX0qsr0tiff3SOXgsZdsnpceGdezGMWn8dxZN+AvEGHuGY68UXqdE+p6k71HWfR143jNvT3yPkMH/ztzcp1rLPg2/Hya9HjoW88JYa2Jve0VCXe8OherxqYuN96veweOmZtdrEt/09NB7hL5fyaRFoutndH+iXQeG3peyvU4yePt4LOk9W9/jLceY3kZD8an1U4duq6H663teN8JzNb56vCyZskRi/THp6e+RUCQkj739mLxj/JEhUhWpkhOnnCjPNT9n3J/19fPbsae7rYdC8nkrHI5IKBRP//qlOcaU2/tkvjeZx5Lx+qU55+r2kAYG5h93ulLLsgKiXyokOudcqaob+LBGHzf0zkvG+5UxzoqIxA89RXrrpkl46xrjeJL4qKyvU7rzlr436TGTcC4Oh4xxZjr2OD95s0+2rpnM1ynDMWZu39/W5uo+Obk2Mo6xDNtL4XUKyrFnHk/mtVG6Y8nOdj2W/LJPpfg6WffJybVRJBzJ+Lef2/tkHkvmtVG6c64Mbg/rbGEzEO9pt4TqIv2jJ0tszjlSXVElsWhU+roPSOSPv5JQ27aBY696lESPPF/6ozEJ9/YZ57nKioasr1Om62+n1+Xlfux5uU91/7N0xGsmYz+jfRLWPkA6UaXlTYnrl9528MOa2LgjBq6NGg6W3tlni/THB2avR3ul5s+PSLx168BxIBGJVx0kvZpNjMn8t7vTa6Nsx571uQ/q6+TWsdfYyCpLlK8Kv4Tq9913n9HA9PDDD5fq6mppbW1NrDHb22u8CWRivgklq62tNb6S6ZtEOvrGlGmGvZ3HNP6gCIfTjkXf+NJt1zerdNv1edAvk15ADf9rxtTD9AI5nUgknDIO1/dp8ELL+jvmxdcQfZOuHCWhmjFDIZpR/mUwVDfGevgZEpk+vPSrsnuPyMZfG1GEaBhufEr9Manq3CtSVSmV9WNE6ofHlbJPIUuQnrKz6bcbY0+z3e7rVIrHnqf7ZDluzJkxKceS+YGHfkUqJHTwsQnlX/TDmqr9zQPHjJp5qkSmHz/8X43OfvjjL42LsrD+cVIzWsYkHUuZ9kkvptJJN0Zje5pjqSRep8StRuiZ/nXK/Pql2+7qPqW5/7RjMf9n1/qBppP6rYbq044Z+rDGGJMxo3j18HuEcVwNv1+FO1sk3PtuyvtSpn3S4yBddRbrMaYX3BWhwVnP4dBAqL73demP9xtj1vIvOlPdWv7l0bceNcq/6L5WR6pl2axlxuyw0K7B94g6/x575qkh+bw1+NM0r1/mY8/1fUp6DOP1S8PYrqG67kxSqK4f1lQs+oRU1A+Uf9FjqXrXqyLb/jD0fhWaeZqxsqa6o9lynhuTdZ8ynbf0mNFQJu0Y0wyf85NH+2TnmmmkY2yQ9XHc2Cen10aZtpfE6xSUfbJeN2U4luxstz5O0fepFF+nPK6NMv3t5/o+JV2DJ48l4brcvA5PCtW1t0jF0R8fmNGuY4/3S/WbvxHRsnT6XmGs2LpIIvWTJGI9zw3uR6Z9ynT97fS6vNyPPU/3yfIaZDqfDbxOMZFwtchBMwbGr/+jx9X2V4xJVHqOCenqiHnnS/XgcWWsgNjwGxHzw5qa0SKzlhkrICoHx5Bpn5xeG2U79tLdf+BeJ1f/JgTKU9GD9a9//evywAMPGOH6Bz7wAWPbxIkT5a9//WvC7fbs2ZNSHiZZuuUnmZakFHJ74e7b/D7TMht72837Leg+ZRqJtUGNJVQPmc1vZrxv+OftOyVklH/RmaKhhHpqxuOaUxstYyjkEqRiHEtOt/tpLG5tz35Ua7K4MDFUHyordFRCnb5QQp0+87gSCTVMHSgnw7GU5/b83pe8GWOW0aRprGyG6oatLxqh+sB9hNLWfwzpBb7OKCvQsWSWfzFnqjdWN6bUVDdDdVVdUS0fmfURmTBqgrR0tgyNxd/vHbmc53x2LGVorBwaaqw8ZfjGW16QkHFchVLfr4yp+70Jx1Mh35ey3T/npwLvU9qtNrfbeK9xut0NJfk6+Wh7UI4lXid3ro38dJ5L2NbTIdKy0QjVQ9aG3WYfrP5eCa3/hRF+Gr9p6S0ydJ7rbi/6ec5Px4xb2/00FmN72q2W7WZgrrrbJGT9sMYI1T8moYSa6uZxJRIa/LBm4Bjy7ljK9BiBfp18dB0BBElRO5l997vflQcffFBuu+02+fCHPzy0fcGCBfLHP/4xYTnLmjVrjO0oEUkz1VM6yttpUnKAhpNlr7JWpLo+83Flp/nNnGUDs0tR3lxprJy5XFm+9vftTwnV542bl7FRqZZ/MUN1azAPr9BYGQBQwnQiwc61CTPVExp2pzQqTdOwe9sriU0rgYRa/YPHVdYGuGmOKwDwWNHSpE2bNsn3vvc9+exnPyuLFi0yGpaaX8cee6xMnjxZrrnmGvnLX/4iP/jBD+T111+Xj370o8UaLtzUujX/UH3jiuGfo3xlWAGRU0d5IKmxck7HlTlLy0WxeEw27N2QEqqby1jthOrrW9ZLe2+762NDBuYKiFxDdT2uzMbKAAD4jZbO0940uYbqeh7UL8C6AiI5VDdWQDgI1TWYB4ByKQXz1FNPGY0Rvv/97xtfVm+++aYRul933XVy7rnnyowZM+TOO++UKVMsy6cRTLqsfe+m4fApl1BdT6htO4oweARqBYSTUJ3wCmnKCjkO1RumDIepLuqOdhuhutahzCVUf3XXq7Jm9xrXx4UcV0DYCdULvAICAID8xNOH6joJwE6obj0PAuYKiNBg7XNWQAAIkKIF65deeqnxlYmG6T/72c88HRM8CtZNyaG6fsJsJ1Q3T6iU74C5AmKwxp6BskIoVlmhQ04WWXu/60OLaz+JNDPVo7GorVD9xZ0vuj4m5LECQpu1FXEFBAAArqhtTA3VtbdI4wz7oXpNo8eDhq9XQGiDW1ZAAAiYojcvRZlKF6rr0q+JR9oL1XV7XfZmtshs2R3PuXI/K644UQK/AoKyQhiprJDdFRBdg/W0C6ChqiElVF+/Z71MrZtqO1QfUzWmYOODgxUQSY2VvV4BAQBA3nRm8aT5qaG69hbRYN1OqK7nwOb13o8dPsQKCADBxZRfeK9pVvpQfajz90iherXI3HNEIszkK3vZVkBQVgjFaKxcgLJCkVBE5oydkxKqt/a02g7VF01YJPVVlhn5KCwaKwMASllljcjgdUlCqG43/NTz4LTFHg8agVwB4SRUZwUEgCLgLzZ4K1Il0jg9v1BdT6j1E4swePgWZYXgl8bKBSgrVFNRk1eofvzk42Xe+HmujwtZ0FgZAFDSQulDde0tYidUt54HgWwrIJTdFRDWlYIA4BFKwcD7YD1TqK5Lv+yE6npCtdaoRXnLtAKCskIosbJCyaF6RbjCVqh+9MSjpaWzpWDjQhY0Vka+YtHsx4Hd6yFdRUHgAMDt96fkUF17izgJ1aOUY4RLKyDGHkppIQBFQbCO4kgXquvSLzuhuvU+UN7cWAEx+yyRNx8rwuBRco2VC1hWKF2ovmD8AluhuqnXuo8oPBorw43QShu6VY0e3qb/HVv/W37u9sTAXEOq/buHw3g9z42eMHBcHXsp4ToAl8RFml8feI+yhurW95iRQvWOXQPvV4AbKyCYeAegSAjW4b2eDpGWjamhupPO39teEemmqVvZy7YCwm5ZodDghRzgRmPlApUV2rhvo3RHuxNCdWvN9JFC9T1de2Rv196CjA3BXAGBANBwXEP1wz8w8K9+WKPHlXXF1on/lBhSbVguUnPQcANcrdXf0y7ypxUDARjBOgA39HUPNNjW81wuobqeB/X9ipVZMLECAkBAEazDW3rxtHPtQB21XEN1PaHqF2CirBB80Vi5MGWFuvu7pa23Taoj1TmF6rs7d8sTW56QmPDHq2dorAw3aajeuXdgBUTyhzX1k4aPq01PDXzgbKzmshxXep4jvALgpnh04N9cQ3XjPMhKOphYAQEguOjYB2/pyTLanz5U16VfdkJ16wkV0BUQlBWCH8oKzT1HJDIYerkoOvjHa66h+iObHqEMTLHQWBkl3FgZQJmLVKSGn9pbxFaoPvh+ZX5YiPJmroBQrIAAEDD8xQaPxTOH6rr0y0moXtPo5cDh5xUQlBWCHxor108s2PDShepb27c6CtWrwpZ9RPFWQNgN1WmsDGtZIRNlhQD4Qkhk8sLUUN36IeBIobqWq9IeEAArIAAEGME6vFfbmD5Ud9L5W0+m1AlFthUQlBVCsRoruywUCsncprkpofrmts22Q/WJoyZKU21TwcaI4KyAQMD4vLEygDJVWStSXZ97qK7vV9oDgpVZMLECAkBAcSaDt7S2+qT5+YXqekKdttjjgSNwKyAoKwS3ygo5+bCmAGWFaiI1UldZl3OoPnn0ZFkyfYmE+eM1WI2VC7gCAiXUWJmyQgCKwfqekkuobn2/AlgBASDAuMqGtyprRMKR9KG6Lv2yE6pbT6hAphUQlBVCiZQVsgbiuYTqS2culUp9f0WwGisDPm+sDAAp4ad+COgkVKcmNhQrIAAEGO888Fgoc6iuS7+chOpR6oWWPTdWQFBWCG41Vi5wWaHkUH1mw0xHoXqMP169RWNluIGyQgD8isbKcAsrIAAE2GBqAHgoFk0fqjtpUtKxS2Q/F2JlL9sKCLtlhcYeKtK83uOBI3CNlRtnFLWs0PaO7dLc2ZwQqk8fM912qN4X7ZO9XXsLOkakWQGhH/75bAUEyrCxcmhwUgMAuN1Y2axpTWNluIEVEAACiBnr8FhcpPn1/EJ1PaFuWM6JE9lXQFBWCMVorFyAskJ9sT7Z0rElr1B91dZV0huzNEFEYdFYGWXQWBlAGcvWWNlOqE5jZSRjBQSAgCJYh7f6ukW6WvML1Y0TKgERRlgBQVkhlEhZITMwzzVUX7l5pezq3OX6uJANjZVR+o2VASBtY2U7oTqNlZFuBYSJFRAAAoQzGbwVj2YO1XXpl61QffCEaS49RBlzYQUEZYXgVmPlaYsLOsTkUL2jt8NWqL7zwE7j+zCnfG/RWBkl3lgZQJnL1FjZbqhOY2WYWAEBIMD4Kxvei1SkD9WdNClpmCIymguxsufGCgjKCsHtxsoFMKN+Rkqovq5lne1QvSpSJU21TQUdI4KxAgIBE4DGygDKEI2VUQisgAAQQLzzwGMhkckL8wvV9YQ6ZxknTmRfAUFZIRSjsXIBygppKD61fmpKqN6voZrNUP3MGWca/yJAjZULvAICJdJYmbJCAPzWWNlOqK7vV/UTizBw+BYrIAAEFMkkvFVZK1Jdn1+obj2hAplWQFBWCCVSVqgyXJlXqP6RWR+RcbXjXB8XsqGxMkq/sTIApG2sbCdUp7EyrFgBASDACNbhLess8+RQXZd+OQnVKd8BN1ZAUFYIASkrlByqN1Y32grVJ4zi+C4KGivDDZQVAhC0xspOQnUaK0OxAgJAgBGsozjShepOm5QcoOFk2XNjBQRlheBaY+XClRXa37c/JVSfN26eo1Bdg3l4xb8rIBAwAWisDKAM0VgZhcAKCAABRJoE77VuzT9U37hi+OcoX9lWQFBWCMVorFyAskKxeEw27N2QEqpHBsM2O6H6+pb10t7b7vrYENwVEAgKfzdWBlCmsjVWthN+0lgZyVgBASCgCNbhrWivyN5N+YXqekJt2+HxwOFrlBVCCZcV6o525xWqv7rrVVmze43r40IWNFZGGTRWBlDOsjRWthOq01gZVqyAABBgBOvwPljPFKrrJ8x2QnXzhEr5Dri1AoKyQvBxWaF4PJ42VI/GorZC9Rd3vuj6mGADjZXhCsoKAQhYY2UnoTqNlaFYAQEgwEgmURzpQnVd+mU3VNftdTTkK3turICgrBDcbqxcAA1VDSmh+vo96x2F6mOqxhRsfAjOCggEDGWFAPgRjZXhKlZAAAgugnV4r2lW+lDdSefvueeIRJjJV/ayrYCgrBCK0Vi5AOFVJBSROWPnpITqrT2ttkP1RRMWSX2VZUY+ynYFBALG542VAZSpbI2V7YSfNFZGMlZAAAgo/mKDtyJVIo3T8wvV9YRaP7EIg4dvUVYIJVxWqKaiJq9Q/fjJx8u88fNcHxeyoLEyyqCxMoBylqWxsp1QncbKsGIFBIAAI02C98F6plBdl37ZCdWtJ1Qg0woIygqhxMoKJYfqFeEKW6H60ROPLtiYMAIaKyNvlBUCELDGyk5CdRorQ7ECAkCAEayjONKF6rr0y0morveB8ubGCgjKCsGtxsoFLCuULlRfMH6Bo1C917qPKNsVEAgYygoB8CUaK8NNrIAAEFxcZcN7PR3pQ3Unnb+3vSLSPdjMC+Ur2woIygqhGI2VCxRebdy3MSVUt9ZMHylU39O1R/Z27S3I2BDMFRAICJ83VgZQpmisDLexAgJAQBGsw1t68bRzbX6hup5Q9QswUVYIvmisXJiyQt393dLW25ZzqL67c7c8seUJiQl/vHqGxsoog8bKAMpYtsbKdkJ1GisjASsgAAQXwTq81dclEu1PH6rr0i87obr1hApkWgFBWSGUSFmh6OAfr7mG6o9seoQyMMVCY2W4gbJCAILUWNlWqE5jZViwAgJAgPEXGzwWzxyq69IvJ6F6TaOXA0eproCgrBDcaqxcwLJC6UL1re1bHYXqVWHKQXiKxspwA2WFAPgSjZXhIlZAAAgwgnV4r7YxfajupPO3nkytJ1yUp2wrICgrhGI1VnZZKBSSuU1zU0L1zW2bbYfqE0dNlKbapoKNEcFZAYGA8XljZQBlisbKcBsrIAAEFGcyeCsUEZk0P79QXU+o0xZ7PHAEbgUEZYVQjMbKBSgrVBOpkbrKupxD9cmjJ8uS6UskzB+v3qGxMsqksTKAMpWtsbKdUJ3GykjACggAwTWYFgAeqawRCUfSh+q69MtOqK4n1I5mjweOwK2AaJwx8D1lheCkrJB++OezskLWQDyXUH3pzKXS2kO5o8A1VuY8B583VkYAxaKZaxA7ec/R2cqsHEWmxspOQvUC1MSOxqIZG7a3dLbYvp+aipqE1YLw+QqIQ04WWXu/h4MGgAEE6/BYKHOorku/7ITqpij1QstethUQGqzbLSvUvN77scOfZYUqIrmvgChwWaHkUH1mw0xboXqlvr/qbtDQyfsVEC0baayM4pcVmn2WyJuPFWHw8F2oru9LOsFFjyvryho9Pzz178Mhpx43OvPT/GBXt2sDXDPEGj1O5OQvEq6XO22s3LbNV42VNVTvifZIbUXt0Lbu/u6hRvDfefU7Rmm8Ksvx39HbIe297UPfj6kaYwTq2t/mojkXEa4HZQVE12CuAAAeI1hHcS7s04XqTpqUdOwS2e/uhRhKbAWE3bJCYw8lWMfIjZWdrIAogO0d26W5szkhVJ8+ZrrtUL0v2id7u/YWdIwIxgoIlGFj5dDgpAaUN31f0mumg48bCEBNGrbr+1XNQcPlFLT2dXID3N5OES1LpudFfS/TD6QJ1suX2VjZrGntk8bKOlNdQ/WTp50s1ZFq2bhvo7T1DnxIrUH5x4/4uIyrHTd0+/Ut62XN7jXSWD2wenXRhEUyb/w82de9T57a+pQRyhOsl/cKCAAYCcE6PBYXaX594GI8n87fG5Zz4kT2FRCUFYKbjZXtroAoQFmhvlifbOnYYvyBmGuovmrrKumNWZogongrIGisDK8bK3Oeg/WDGg2qzOtuPa50ZY1+CFhdl3kFRHf7wM/1uJp1BisgkL2xsp1QvcCNlfWa6e32t6U72m38fw3VF4xfIEc0HTF0m1d3vSob390ooytHpy2vZ15XoXxXQACAHQTr8FZft0hX68CFea6hunFC5UIHI6yAoKwQvG6sXKCyQtY/7HIJ1VduXim7One5Pi5kQ2NleFBWqMiNlVGmjZVZAYGRGivbCdUL3FhZZ6prqK7MUN0681xD9Rd3vjj0fXKovqdrDyv9vOTTFRAAYEdhzmRAJoP17dKG6rr0y1aoPnjCNE+8KGODKyAoKwQ/NFaetrigQ0wO1bUmqJ1QfeeBncb3YU75/lgB4SRUp7EyzLJC+YTqlBWCm42VgZEaK9sN1QvUWFnLt1jLvzgN1XXSwhNbnsjYABXltwICALLhr2x4L1KRPlR30qRE6z9qUyWUN3MFhKKsEPzSWLkAZtTPSAnV17Wssx2qa5MubdaFgK2AoH4xzLJCyqeNlRHAFRC5lhUysQICbjRWnnuOyGCZOzeZjUpzDdWtkxbgMZ+ugACAbHjngcdCIpMX5heq6wlVmypx4kS2FRCUFUIxGisXoKyQhuJT66emhOr9GqrZDNXPnHGm8S8C1Fi5wCsgUCKNlSkrBCdYAQE/lRWqn1iw4aUL1be2b3UUqleFuW7ylE9XQADASEgm4a3KWpHq+vxCdesJFci0AoKyQiiRskKV4cq8QvWPzPqIjKsd5/q4UODGyoCdxsqKskJwYwUEjZVRImWFQqGQzG2amxKqb27bbDtUnzhqIiv9vOTjFRAAMBKCdXjLOss8OVTXpV9OQnXKd8CNFRCUFUJAygolh+qN1Y22QvUJozi+i4LGynADZYXgKhoro/TLCtVEaqSusi7nUF0nLSyZvkTCrI72js9XQABANoNXU4DH0oXqTpuUHHB3Zmg0Fs3YpKals8X2/dRU1CTMkIDPV0AccrLI2vs9HDQC2Vi5o7moZYX29+2XTa2bEkL1eePmOQrVNZiHxysgdHaoz1ZAoAwbK489VKR5vccDR+BWQDTOGPieFRBwUlZIP/zzWVkhayCeS6iukxZaeyh3FLgVENZrdQDwEME6vNe6VaRtW36h+sYVwz93KVTvifZIbUVtQkd5s/nNd179jrEc0FqjWEOq9t72oe/HVI0xAnWt6XfRnIsI14u9AsJuWaGuwYACyNZYecpRRSsrFIvHZMPeDRIZ/OPVDNUjg2GbnVB9fcv6hPcreLQCQo8Hn66AQBk1ViZsgJ0VEBqs210BwQc1MMsKVUR8W1YoOVSf2TDTVqhuTlrQ6y94vAKiZaPvVkAAwEgI1uEtvXDZu2k4fMolVNcTatsOV4elM9U1VD952slSHamWjfs2SlvvwIlZg/KPH/HxhBrFGlKt2b3GCLjUogmLZN74ebKve588tfUpI5QnWPcQZYXgl7JCZjkZF3VHu42Z6pFIJKdQ/dVdrxrvV/AQjZVRBo2VUaaNlVkBgQCUFdresV2aO5sTQvXpY6bbDtX1+mpv196CjhHBWAEBACMhWIe3Bi9e0obq+gmznVDdPKEWoO6dhupvt79tBFn6/82O8kc0HZEQUm18d6OMrhyddkmheYGGAK2AcLmsEALIx2WF4vGBP16TQ3VdaWMnVH9x54uujwl5rICwVVaIxsowUVYIPmuszAoI+LysUF+sT7Z0bDH+lss1VF+1dZX0xvibzjMBWAEBAJnQkQPFkS5U13pqdkN13V7nfkM+nalu1tQzQ3XrzPPkkCo5VN/TtYfZDcVYAeGjskIIKDcbKxdAQ1VDSqi+fs96R6G6lquCV2isjPJprIyAobEySryxsnWSUy6huk5a2NW5y/VxIbgrIAAgG4J1eK9pVvpQ3Unn77nniAzOQnCLlm+xln9xGqrrhdoTW57I2AAVHq+AKFJZIZR5Y+UChFdaW33O2Dkpobr5IaCdUF3LVVGeKmArIOYsK8jKLJRYY2XKCiGXFRD5lBViBQTcKis0bXFBh5gcqmt/LDuhujlpIUxU4o8VEE5CdRorAygCzhbwljb/bJyeX6iuJ9T6ia4PzWxUmmuobr1Qg8d8WFYIAS0rlE+oXqCyQjUVNXmF6vp+pT0gELDGygVcAYESaqxsoqwQ7GAFBPxUVqiAZtTPSAnV17Wssx2q6/VVU21TQceIYKyAAICRkCbB+2A9U6iuS7/shOrWE6rL0oXq2lHeSaheFSYM8cUKiCKXFULABKCsUHKoru9XdkJ16/sVPEZjZeSNskJwEY2VUQZlhTQUn1o/NSVU10bwdkP1M2ecafwLjwRgBQQAZEKwjuJIF6rr0i8nobreh4tCoZDMbZqbEqpvbttsO1SfOGoisxuCtgKiAGWFUKaNlQtYVihdqK4fAjoJ1VlR4zGfroBAwFBWCF6tgLAVqrMCAv4vK1QZrswrVNdJC+Nqx7k+LgR3BQQAZMNVNrzX05E+VHfS+XvbKyLdg0tZXVITqZG6yrqcQ3W9UFsyfYmE+ePVHysgilhWCGXcWLlA//3TWDlgArACAgHh88bKCBpWQKB8ygolh+qN1Y22QnXrpAV4yKcrIABgJCSA8JZePO1cm1+oridU/XKZNRDPJVS3XqjBYz4sK4RybKxcmLJCNFYOIBorowwaKyOAWAGBMikrtL9vf0qoPm/cPEehugbz8Ip/V0AAwEgG00zAI31dItF+kYpIaqiuFz52QnXrCbUAkkN17SjvJFSP8cer9ysgWjb6qqyQWbojU5DZ0tniqHGlNUSFj8sKzT5L5M3HXB8ajZUDjMbKcKusUNu24e8pK4RiNlbuGgy+gExlhTqai1pWSP8W27B3g0S0IaYlVDcbwdsJ1de3rJf23nbXx4YRVkDo8eDTFRAAkAnBOjwWH/gnXaiuS78aZ9gP1WsaXR/d9o7t0tzZnBCqWzvKjxSq64Ua5RaKsAJi8MLZL2WFNFTvifZIbUWt8X1frC8h2PzxGz9OCEf1Z3rcmEG8NsDVWv26ikLD1IvmXES4HoTGyqHB+pAeNla2fkhDY+WArICYeOTA9zRWhpOyQmb4RFkhuIHGyihkWaEpR9kvK2SWk3FRd7TbmKkeiURyCtV10sKa3WtcHxeCuwICALIhWIf3ahvTh+q69EuDdTuhup5Mm9e7OiwNP7d0bJHqwUaWuYTqq7aukt4YJ3VfrIAoYlkhDcg1VD952snybve7xnFlmlE/Qz5z5GcSal9rmY4x1WOGGuBqrX49rvZ175MntzxplAIhWA9AY2XrDC2PGiu/t+m9thsrM/PKQz5eAYGA8XljZQQQKyDgl7JCh5wssvZ+14cWjw9M5EoO1XXii51Q3boSEB7y6QoIABgJa4zhLZ1ZPGl++lBd2QnV9YQ6bbHrQ7POKM4lVNcLtV2du1wfF3JcAeGDskIaqusKCP2wRr+OGHuEzGmaI+NHjTe+4hKXP+z4g1SGK2V05Wg5rPEwY3b6lPopxs/rK+tZARG0xsoFKCtEY+UAorEyyqSxMgKGxsrwU1mhAjZWbqhqSAnV1+9Z7yhUH1M1MOkFXqCxMoDg4iob3qqsERm8wEkJ1TWkthOqW0+oBZAcqmvjGjuhunmhFuY/K3+sgHASqhegrJC5AsLECogyaKxcgLJCisbKAUZjZZRwY2UEEI2VUQZlhbS2+pyxc1JC9daeVtuh+qIJi1gt6iUaKwMIMN554LFQ5lBdl345CdWj7s+W0TIdyaG6dpS3G6rrhZrWxkaAVkDoMWWdHeESVkAEtKxQPisgClBWyIrGyiWyAqLIjZVRhmWF5p4jMljmDjDQWBlulRVKDtV9UFaopqImr1BdJy3MGz/P9XEhuCsgACAbrorgvVg0fajupElJxy6R/e5eiOmF1tT6qSmhuja/sRuqnznjTONf+GAFRBHLClmxAqJEGisXuayQNlZODtVprOxjPl4BgTJsrExZIdhZAWE3VGcFBAJSVig5VNdG8HZCdeukBXjMhysgAGAkJDbwWFyk+fX8QnU9oW5Y7vqJU+tc5xOq64XauNpxro4JeayA8EFZIVZAlFBjZUVZIbixAqKIjZVRpo2VARMrIFAmZYXSheoLxi9wFKpbV5+ifFdAAMBICNbhrb5uka7W/EJ144RauAud5FBdO8rbCdWtF2rwwQqIIpcVYgVEwFBWCGXUWBkB49PGygggGiujTMoKbdy3MSVUt9ZMHylU39O1h5V+XgrACggAyGTwihzwSDyaOVTXpV8dzfbrqekFvsv29+2XTa2bEkJ17SjvJFTXABUer4DQ2aE+Kivk1gqIkDkjH8ForDz2UJHm9QUbImWFSmQFROOMoq6AQEDLCumHf4qyQih2Y2XrtTrKW6ayQhOPLGpZoe7+bmnrbZPqSHVOobpOWnhiyxMSE8qKeMbnKyAAIBuC9RIVjcUlFh+cMZdkd3u37fupqYrImJrhgNAVkYr0obou/ZpylL1QvWHK8Mx3l2hjvw17Nxid5K2hutn8xk6ovr5lvbT3trs6LthYAaF/8PmorJBbKyBaOlsKNi4UoLFyAcOGTGWFZh802/ieskIBWgGhwbrdFRAF/KAGASsrVBHxbWNlBHAFRMtGGiuj+GWFZp8l8uZjrg8tOjiRK9dQ3TppAR7z6QoIAMiGYL1EQ/XuvqhUV4aNf818PRIOSU1lRO5/aavxfTwel5b9PdLbPxAqhkMhGVdfLVWR4RNSNB6XS0+e6WK4HhKZvDB9qO6k8/chJ4usvV/c1B3tNsLPSCSSU6iuF2prdq9xdUzIYwWED8oKsQKiDBsr+7Ss0GkHnyZPv/O062NDARsrF2AFhNaczTYDz8mHeTUVNQlhBYrUWNnJCgiXZTueOJZ8ihUQ8FNZoVDhVmamC9W3tm9NeG8aKVSvClOO0VM+XQEBACMhWC9BOlNdQ/WDaislNHrgxNNQWynvnTJGIuGwnHvUVOmNxuTxN5qlszcqepOqirB8eP5kGV83XF7l2b+0yAub9kp3b9S9YL2yVqS6Pr9QXU+oXYMBhYv0gwbjIZJCdf3D0U6obp39AA9lWgFR5LJCrIAIGsoKwWeNlV1eAaHnsgN9ByQcChvHhfW40vcr/XD59ldvHwoTdIWD3tb8udaaNRvgalmhKXVT5BNzP0EgWuzGynZXQLhcVkiPp67+wffLQdbj6hd//oURTulxY4bvmY6r0VWj5bL5l3EsFXsFBI2VUSJlhUKhkMxtmpsSqm9u2yzvbXqvrVB94qiJXIN7yccrIABgJATrJUpnqmuoXl0RkYNGVcqCgxulIjzwh0zjqCpZvna7dHT3y+jqCiOEP+/oaTJxTM3Q77/89j7ZuLMAM2Wty7OSQ3Vd+mUnVDdPqAUo39FQ1ZASqmtH+al1U22H6mOqxrg+LuSwAqKIZYUUKyAChrJCKPHGyhpuaqC5cMJCmdU4K2FljX4IqMeVHlMaJiyZviThuFq1dZURoo6SUcZxdeykY+W13a8ZdWwJQwPSWNnl1Q9mWD6udpxxrGi5KuvKmhMmn2DUKB5TPXBNlOm4erf7XeOLY8krNFZG6ZcVqonUSF1lXUqobhopVNdJC8dNOk6Wb1ru+tgQzBUQAJANwXqJMsu/JIfq/bGYEapvf3dgllGmUP25v+wp7ADThepOm5QccHdmqM4snjN2Tkqorh3lNVi3E6ovmrBINr670dVxocArIApQVkixAqIcGytTVgj+XgGhx4WG6maAqceEHld6/tMPAQ9rPCzthzV6/I2uHJ2wAkKDdXjA542V9Vg5YuwRKT0g/rDjD8bsdf3KtLJGjystA0MtY4/RWBklXlbIXBWTa6iu71f69x+KwIcrIABgJHR3KGFa/iU5VF/3TqujUF3vw3WtW/MP1TeuGP65S/SPu3Shut3wUy/U5o2f5+qYkMcKCLtlhczjqgAyrYBwEqqzAsJD2Ror+6CsUHKoTlmhAKyAUD5aAZGtrJCTFRAIWGPlAsrUWDk5pKKxcomtgLC+n6G8ywopn5YVSg7VZzbMtBWqm+9Xev0Fj1dAJIfqPlgBAQAjIVgvUdqoVGuqJ4fq73b22Q7Vjz10rNS71rR0kF647N2UX6iuJ9S2He6OyzrEpFBdm9/YCdWtF2rwmA/LCmVbAaHsroBgabxXXGisrGWFRk8oWFkhRVmhgPB5Y+V0oTorIHyshBsrnznjTONfBKix8rTFHg8a/uTvskLbO7anhOrWDwFHCtX1/Up7QMDjFRDJoboPVkAAwEgI1ktUTWXEaFSaa6h+4nvGycKDC7DM07rcNzlU10+Y7YTq5gnVOlu5gKG6dpR3EqqzpNljbqyAcLmskGIFRBmWFZqzrCDvS5QVKrEVELZC9cKtgNDyG+lCdVZA+LysUD6huo8bK2uNdgSssTIwUlkhJ6F6AcoK9cX6ZEvHlrxCde0BYTbshgcCsAICADIhWC9RZuuO5FC9MhyyFaofc8jYwg4wXaiuS7/shuq6vc79maEb921MCdWtM4ZHCtX3dO1hdoOX3FgBUYCyQglDZAVEMLjZWLkAKCsUNP5cAeFGWSFWQHjMp2WFrCgrFDA+a6yMgPJxWSHrJKdcQnV9v9rVucv1cSG4KyAAIBuC9RIWTROqHzX9IEehem+0AH+INc1KH6o76fw99xyRiLsz+br7u6Wtty3nUF0v1J7Y8oTEhHp8nsm2AsKnZYVYAeFz+TZWpqwQfLwCIltZIVZABLSxcpHLCmVaAUFZIb/y7woIBEwAygolh+r6XmMnVDffr8JEJd7y6QoIABgJZ4sS/sz3jzvaU0L1MZZmpCOF6i37e2RPh8szUrSOZuP0/EJ1PaHWT3R3XBoqDP7xmmuobr1Qg8d8WFZIsQIiYCgrhBJvrJytrBArIHyMxsoooxUQCAp/lxWisXLA+HgFBACMhGC9RHX3RaWtK/dQfVd7tzz6+k6JDf4R7hprg6rkUF2XftkJ1a0nVJelCz+1o7yTUL0qTBMuT2VaAVHkskKsgAgYygqhTBorZyorxAoIv/JnWSFFY+UA8nljZQSMT8sK0Vg5gAKwAgIAMiFYL1HRWDyvUP1Xr26T3v4CBnrpQnVd+uUkVNf7cFEoFJK5TXNTQnVrR/mRQvWJoyYyu8FLbqyAKEBZIcUKiIBxo7EyZYXg8xUQbpQVYgWEx3xaVkjRWDmgfNpYGUHj37JCNFYOIn+vgACAbAbX2qAUVYZSQ/W39xwwgvORQvWevoFQvaqiAJ+99HSItGxMDdWddP7e9opI9+BSVpfURGqkrrIu51BdL9SOm3ScLN+03NVxIccVEHbLCoXMVr/erYBo6WwZ+p4VED6TqbHyxCOLXlZIZ4cqygoFaAWEGT75ZAVEtrJCTlZAWN/DEIDGyl2DAUUBZFoBMbVuYLYoZYUCtAJiylH2V0CY5WRGcMHKC1wZ9UNLH3LlflCAskJ6nivBxsqc54rApysgAGAkzFgvUZoVHjm1MSVU/2vLftuh+qSGGhlf5/KMFL142rk2v1BdT6j65bKw5Y/XXEJ164UaPObDskKsgAigvBsrU1YIg2isDLfRWBklvgICAePzskI0Vg4a/66AAICRcFVUomoqI1JXW5FzqD71oFr54JGTjHDQVX1dItH+9KG6XvjYCdWtJ9QCSA4/taO8k1Bdm3nBQ7oCwmdlhdxaAbFk+pKED3xQQJQVQpk0Vs4UqrMCwsd8WFZI0Vg5gHzaWBkB5dOyQjRWDiAaKwMIMBKbEhWxBOK5hOrnLJwqVZFCHB7xzKG6Lv1yEqrXNLo+uu0d21NCdWtH+ZFCdb1QI2zwkBsrIApQVkixAiJg3GisXD+xYMOjsXIA+bSxspYVyidUZwWEx2isjDJprIygobEyymcFBABkQ431Epccqh82vs5eqD5YW91sTOWq2sb0obou/WqcYS9U15Np83pXh9UX65MtHVukenDGaS6h+qqtq6Q31ltyjXBjGY6D3ZZ6/SOpqYrImJrKwqyAqIj4qqyQFSsgAnQ85dtYuaPZ87JC7216r+2yQqU288rXx5IbKyBmnyXy5mMFKSuk5zlWQAQEjZVRiBUQbdt8twICATvXuVFW6JCTRdbeL26jsXKJrYCwXl/TWBmADxGsl7Bt+zplR1t3Qqh+yLjRtkP13mhMWva7PMMpFBGZND99qK7shOp6Qh17qOvBuvUPu1xCdb1Q29W5S0rtYr6rt19GVw+/Vehx0ds/cMx88/GN0lBbKfWWC3X9+Z6OnqE/AvR40lr9+u8n33eIywFW3NdlhXQFRHPn8MVgua+A0ONpf3efRCIho1yVdWWNHjd6PJlGOq7G11fL5087zN3jyY3Gyj4tK1RqjZX1WOru6zc+dND/r/RwMo+r+1/aamzr6O6Ttq6+rMeV/v4XTnf5WPJpY+VsZYVorOxzNFYuqmV3POfK/ay44kQpKp82VkaW66aePgkPng8i4YHrJ/Ps8LMXtxh/q5nX5Xq7cfXVCauOzfPg2NFVcsUZ73HvXOdGWSEaK+etZN6bPG6sDABuIlgvURoWvLX3gFTrTN5cQvX+mDz+RvPQhZprKmtEBi9wUkJ1DRPthOp6Qi3QzNB04ac2rrETqpuzH8IlVGFJQ0wN1U+bM8H4Vz+s0ePK9IH3TpKFBw+X5NGL+0df32mEV2YDXK3V39HdbxxP3b1R92eGZloBoasfilhWiBUQ6Y8nDdVPmz1BJoypGdpuHlcHjRr4w/3YQ8dmPa7qayoGglU3jyezrJB++FeCZYXM2aSldCxpqN40uloqK0JSGQoZDbvN3iLnHjVV1r7TKv/31r6sx9Uv1rwjHV39hXlvyrexcoHOc5lC9XJfAeFrPm+snM8KCMoKecznjZWReq7TsHxCfY2Mq6+S904ZI5HwwDVBNBYz/s7r7I3K6KqBiSwfnj/ZmMxiMs+Dert9B3oLc67zYVmhbI2VNVi321h547vDEz5QYD5eAQEAIyFYL1HWQDyXUH352u3SbJnt7p5Q5lBdl37ZCdVNUfdny8yon5ESqmtH+dkHzbYVquuFWlNtk5QaDdX37e81VkBYP6w5872TEo6r1X9ukcpI2PiyHle6VLUgZYWyrYAoYlkhxQqI9HSmlYbq5h92Wq7KPK70OEvXAyL5uHrfrCb5xSuWZeyFLitkdwUEZYU8pR+uaKheV1UhR00/SMYMfvCituzrlD/uaB9abZPpuKooZHPgTCsgithYOVtZoXJeAeFrPi0rpGisHGA+bKyMzDRUP35mk1QMhur9sZise6dDph00yjjPVVeG5byjp8lEy6SFl9/el3AerKkMl01ZITcaKx9cfzDBupd8vgICALLhqqjEJYfq7V19tkL17e92Gd+bSw9dFYumD9WdNCnp2CWy390LMb3Qmlo/NSVUN5vf2AnVz5xxpvFvqdEZxcm1+oteVmikFRB2ywpNWyyFxAqIMmysXAA0Vk5PZ6onh+p6XD33lz22jyvzfaocGiu7UVaIxsoeo7EyyqSxMtLT8i86Uz0xVG+VdzsH3gcyherW86Cu2LKWQXMFjZVRCD5cAQEAIymtxAYJDm0anRKqv7b1Xduhum7XOn3uios0v55fqK4n1A3LXT9xVoYr8wrV9UJtXO04KdWyQibflBUaaQWE3bJCBZRpBYTdUL1UV0AEsrGyD8oKmSgrNEA/o9HyL8mhupMPa7RclXXZvOsrIJSPGitnKytU7isgfC+fxsoF4sYKCC0rVKrnuZJdATH3HJHBMncoPKN3SB6hup4HrWXQyqWsEI2VA0hXQCSH6j5YAQEAIyFYL1EaPk0bOyolVO8bbPI2UqiuF2pap8/1maF93cNNRXIN1Y0TauEudJJDde0obydUt16olRL/lhUaYQVEkcsKsQIiYCsg3GisrMeU9f3MJZQVyhw2mDXVc10BoT0gNBx0n78bK6cL1VkB4WNaVihdqF4CjZWXTF+S8IEPCsyNxsoFXAGBVOYZKjlUrwyHbIXq1vNgOZUV0sbKyaF6KTZWLhkBWAEBAJlQY71EWQPxXEJ1vVArRNQgg/U404bquvTL2qxtpHpqeoHvsv19+2VT66aEUF07yjsJ1TVALUW+LCtkroDQ2aE+Kivk1gqIUGH+Kyyqkm6sPPbQgtTrH3oIygq5XlaotbOAs9F82lhZywo1dw6fa1kB4XM0VkYh+LCxMjKLDtZUt4bqWgbNSaiu11+elRWaeOTA9zRWRomsgACAbErrr2ykSA7VDxpVaStUt16ouS5SkT5Ud9KkpGGKyGh3L8R0WfuGvRtSQnWz+Y2dUH19y3pp722XUuPPskIurYAoQFkhK1ZAlGFj5QKgrFDAygr5dAWEG2WFSnUFhG9lKytEY2W4uQKiiI2VkZmeobQJaXKobi2DNlKorqv89nT0lE1ZIRorB5hPV0AAQDa885Sw/V39KaH6goMbHYXqHd2DF0euCYlMXphfqK4n1DnLXD9xdke78wrV9UJtze41Ump8W1ZopBUQPigrpCsgkkN1VkCUeGNlygp5ypdlhXzcWDlbWaFyXwHhXzRWRuk3VkZm3X1RaevKPVTX8+Cjr++UmNsfIvu8rBCNlQOIxsoAAoq/ikpUNB6XN7a3poTqZkd5O6H62ndahy7kXFNZK1Jdn1+obj2husictZgcqmvzGzuhuvVCrZS4UVaoIM0BR1oBYStUL1xZIVZAlGFjZR+XFaKxMo2VrdKF6qyA8DEaK6PEGysjs+jgNXeuobqeBwtznnOhrFCB0Fg5gHy8AgIARkKwXsKzG/riuYfqeqH2f28N/sHmJuss8+RQXZd+OQnVC7B0uKGqISVU147yTkL1MVVjpBT5sqyQGysgClBWSLECImArICgrFDj+LSvk78bKmcoKsQLCp3xaVkjRWDmI/N1YGelVhlJDdS2DZidUN8+D5vmvHMoK0Vg5gHy+AgIAsvH12aKnp0euvfZaWbx4sZx44ony4x//uNhDCgxztV9yqK4d5e2E6tYLtYJIF6o7bVJywN2ZoZFQROaMnZMSqpuNteyE6osmLEqYHVEq/FlWyKUVEAUoK6RYARGwFRAjNVamrJBv+bKskE9XQLhRVqhUV0D4lhuNlQtQVsiKskIlsgLCSahegBUQSE9PUUdObUwJ1Z007J7UUOP+tZOPywq50VjZfL+Cx3y4AgIARuLrK9lbbrlF3njjDbnvvvvka1/7mnz3u9+Vxx9/vNjDCoyG2tRQfd07rY5Cdb0P17VuzT9U37hi+OcuqamoyStU1wu1eePnSanxbVmhkVZAFLGskIkVEGXYWJmyQp7yZ1kh/66AyFZWqNxXQPgXjZVRHisgkF5NZeT/t3cfYHYWVQPHJySEBEIoggKKih0BBUGsnxVUsPcCYsOCYu9i7x0RFBS7oCKK2EBQQUSsoIANCzYQVJAikEBI+Z7fJLPMvrl19+7uvZvz1/uQ3fvuW2bOnDnnzJnzpkUL5004qG4efMiOW+XyKOtaWaF4sfKIMaQ7IIIgCEY2sL5kyZJ07LHHpoMOOijtsMMOac8990z7779/Ovroo2f61kaCuevNSTtss3itoHp5o3wvQfXdt9s8bbxgwIF1hst/z59cUN2EeuVFg72v+hYbQXUvv+klqF4barOJoS0rVDOEZYViB8So7YCIskKjxtCWFRqBFyu3CqrHDoghJl6sHMzyFysH7ZlbBcQnElTP8+BUzHNDXlYoXqw8YgzxDoggCIKRDayfd955afny5WmXXXYZ+92uu+6azjnnnLRyZawe95LdMHcSQXWG2s7bTsE2z6oe51pBdSvMvQTVy4Q6BeU7WgXVvVG+n6B6XXN0NjD0ZYUGsQNiwGWFEDsgRmwHRJQVGjmGtqzQkL9YuV1ZodgBMawMZ1khxIuVR5HhfrFy0J5mUF0ZtJ6C6mvmwWJLrAtlheLFyiPICOyACIIgGLnA+iWXXJI222yzNH/+DVksW2yxRa67fsUVsRLZjZLb0Ayqe6N8L0H12lCbEloF1W396jWo7veLBp8Zet5l560VVK8zhrsF1S9deumszG4Y2rJCg9gBMQVlhcbdYuyAWPderDwFRFmhESsrNKQ7IAZRVmi27oAYWoa0rFBNlBUaMYb0xcpBey68bMlaQfW+Xti9YmW65Orr1pmyQvFi5VFkuHdABEEQdGLOqilZvp48xx9/fDrkkEPSqaeeOva7Cy64IO2xxx7ptNNOS1tttdXY73faaae0YsWKtPXWs/ulFYymXsg9OielhevPTdevWJnWxBpysJ2BtWiDG+r0CThct/wGZ0vAoWQBrly1Kl2zbEXaeIN5ubRMR666uMcbW7V6QqyynPLvi4HOqDGRrr/h6rf15HtfldL1S1ZPrOVJZJcuvzYlQe+ypbUFjKdeWLVmMveymzlrliU4fuXfG66/YTbAakPN9/X2ZQGLa66/JmeELN5gcS4FMhtkSctsOH/umtqMq8bkhlyRFb9eOH/uuK2qTblaf+6ctHzlqix7A5GlcnPuRzu7NrkpRli+6MKUll2z5p59P3e1XI0tO62Rq7zlcFVKCzbrKEv9yhPZkbUu0FDXaCQzG61/gzPUSa783ZLrl6RF8xfNKnkiK2LpxeEraLMVaxRWJ7mim8piTUd56kuW1ugVJTzGdE26Qa7mL1pz7IpKrtJ4fSVYsWzJ6sz3AcoSFs5bOKaPSjYW+ZAx6vcL11847mVdTbly3PJVy7PszRZZ0hwL5q2e54ohQxzI1UbzV+sCv1+6bEVbuSJLV1+3Ii1eMGjdlG6Qi3yh5TfIlXlu3oKU5m6wWh2tasrVGn3lGDqqyzzXqzytWvM//U9myEudcQx63qJN/ncHucpnGoG5rh90W9Etk8HLcLuWMp7oPJdlpjo5mRr7ec5q+8oxY3+/Rq7yHDQ185z5qgTUUeSK3VRYunxpW7kqtpMkhtmgm1BM11Zs2cc7HXqSpYnopmwrVTdY5KrMc2xyNnahqa9WXp/SsqsHLk+d6Gdng5fhDrye+EzOdeUl23NusKlXz1dz0kbz5+adgOa5cqzvzHOVtZuWXLc82+GDtZsa8xybo9i77Nj5G62Wj9K14+Rqjb4iiwOc5/KtrZmfLBjTRbVeyXKW/7/6phzDvqop+mpU5rnZo5tWrY4PNHfqkassG2suSK7q/qjlynnc2AabDCQ+MEjdVMcYZiticUcdddRM30YQzAhDG1g/8cQT09vf/vZ0xhlnjP3u/PPPT3vvvXf62c9+ljbd9IZtY7vttltatmxZ2nLLLWfoboMgCIIgCIIgCIIgCIJg3SIC68G6TJXeOVzc5CY3SZdffnmusz5v3ryx8jALFixIixeP385+5plnztBdBkEQBEEQBEEQBEEQBEEQBOsaQ1tjffvtt88B9bPPPnvsd2eddVYu+7Lemnq8QRAEQRAEQRAEQRAEQRAEQTDdDG2EeuHChelRj3pUevOb35zOPffc9L3vfS996lOfSvvtt99M31oQBEEQBEEQBEEQBEEQBEGwDjO0NdaxdOnSHFg/+eST06JFi9KznvWs9PSnP32mbysIgiAIgiAIgiAIgiAIgiBYhxnajPWStf6e97wn/epXv0qnn356BNVnKf/973/zi2rvf//7pzvf+c7poQ99aPrkJz+Z6+sHQT+ELAVTgZdjf/nLXx77+alPfWo69NBDp+36t7/97fNLu4N1h6uvvjodf/zxPR1Lvz3ykY+cVpkMhmO+O/HEEyf89695zWvyB2SHXgvWTUKWgkES8hQMkloe+rHV++W4445LD3jAA3o6NuQyCIKRCqwHs59///vf6fGPf3z661//mt73vvelb33rW+kFL3hBOvroo9MBBxyQVq5cOdO3GIwIIUvBVPHtb387HXHEETN2/R/96Edpl112mbHrB9PPZz7zmfTVr361p2OVyTvvvPOm/J6C4eL9739/Ou2002b6NoJZQMhSMEhCnoJBctBBB+XPMNnqz3zmMyOZIQiCccwb/2MQTC/vfOc7001vetP08Y9/PM2dOzf/btttt00777xzzjb+4he/mPbZZ5+Zvs1gBAhZCqaKma6YtuWWW87o9YPhlbm///3v6XOf+1y6zW1uM+X3FAwXM62XgtlDyFIwSEKegkGy8cYbD53cbbTRRtN2rSAIRoPIWA9mjMsvvzy/lPbZz372WCC0sM0226THPvax6Utf+lLafffd06mnnjr23YMe9KD06le/euznD37wg+kVr3hF/vcf//jHvDXrTne6U3rwgx+cs5ULVpZf/vKXpze96U3pLne5S7rHPe6RjjzyyGl51mBqCVkKeuGss85KT37yk3OZIAsu5OU///lP3v6prz/84Q+nu93tbmm33XZL73rXu7KRrgTLa1/72vTPf/4zl2S58MILx3ZI7L///mmnnXbK8vHjH/947DqOO+SQQ/K5nve85+XfKWnm2q5rq6mFnoItrkoYOZa8eXH3L3/5y5alYJYsWZLe+MY35nP7vOENb0jXXXfdNLZi0A8XXHBBLmNH5h7+8Ifn0lT6n8yRB5l9diPc7373S8cee2z+G98ddthh6ec//3nu+06QhRe+8IVp8803X+s759lrr72yTD3mMY9Jv/jFL6bsOYPeoD/06Q9+8IMsB/re2Dff6CP64bnPfW4uBQTzVjmOjvrDH/4wNgd97Wtfy5+ydf3Pf/5zfheRY+mlpzzlKen888+f0H2SxYc85CFpxx13zHrmLW95S1qxYsWYvrIr7CUveUmW67333jv97ne/SwcffHDWnfe5z33GlYFop3eDyRGyFLI0SEKeQp6GVd7qUjD/+9//ss2jP+9617tmn80xrWx1cvm2t70tPfCBD8w2luP66fPvf//72R4ns673spe9LF1zzTX5uygFEwRBkwisBzPGb3/721wbltPfCgFLE6yJU4ChBLP+8Y9/jAs6nXHGGen//u//0rXXXpsnyF133TV94xvfyAHTj370o+Pq1J500klpgw02yAYfI09QQ+mQYLQJWQq6cdVVV2Uj/V73ulcuEyTAqf/tcCiBb/0n4C1YLQtYsJyx/7rXvS5ttdVWuSTL1ltvnY8nC5w2W085eK961avGZctYwHEuRj8H8mlPe1qWP8FOToH3h3z3u98dO56TKuuYPDnuOc95TrrsssvWeo7Xv/712TEgj0qA+PeHPvShaWnDoD/oJDK3ePHiXNZFnwqYF37961+n3//+9+mYY45JBx54YA4QkDFyZZsx2fNzO5zTosoTnvCEtb4jZxxK1yer97znPfP16b1g5qF3jGF99PnPfz73v8Vaeunss89OX/nKV9Ipp5yS5YU+ohfMR/vtt1+68sors3xYNPFxrFJnFubs2vr617+e9YlgkyBTv5gjBTgEEb7zne9kuXQNQYbCZz/72bxQbX7cdNNNs35TV5ksC5JYdHZP3fRuMHlClkKWBknIU8jTsMlbjQSYSy65JNvX7HRl8Px9O1udLUTWyCsbvdc+9/sXv/jFeRHIYgw7m08wmRruQRDMbiKwHsxolnGn7VSbbLJJ/u/2228/lq155pln5gnxoosuSpdeemk24kyqgqHf/OY3041udKOcqXDLW94yG1CMORNvgZElSHqLW9wiZ5v6+Te/+c20PG8wdYQsBd2wWPL85z8/191XIogjaMfCn/70p/w9R49hf6tb3Sq/CPIOd7hDDnzOnz8/b0O1E0JJlrIjQpa6rJqb3/zmeRGGoc95KzzxiU/M5xIsZ4jf8Y53zM6g3z360Y9O++67b/rEJz4xdrzjBOFvfetb56wbMnvCCSeMewYyypmUpez+d9hhh/TWt74178oIho+f/vSn6eKLL85lqvSvjHX9XpgzZ05673vfm253u9ulxz3ucblkFVlZsGBB2nDDDdP666/ftgwQWbPDRv87TxMOqmwq2VZkjmy5zlFHHTWlzxz0Bl1ExzzsYQ/Lc42+Nx8Z13ZA/eUvf8n6QRDAy7jNQ+YjwSkBI3MdOfGxW4F+e9KTnpSz+ugkuoGekSnaL2TvHe94R9aPN7vZzXJ2KP1VdCUsJgo4mP88w9KlS/OiH/1F7ugq82o3vRtMnpClkKVBEvIU8jRs8lYjI52M6X8+nd2hdiW3s9VlqkuuIhf99LnFF3IjccG17n3ve+cEhZCPIAjaETXWgxlDIBIy6ExaTWz3AgPMarTsAlvZTbgCqTI1YcsX483kKzBav+RPsKwuDeI69c8mZ1mFwWgTshR0g6EtyOilkLKEOXW2LjO4waBftGjR2PH+3ak/GeX1sahLsnAyCzLWm7spyJbMrUK5D6y33nrZWWxulVZPmxxyTAu2p/oEwwf52m677cbJla3HdjmA40/uChy/WiYKFgEt3hQENMp2acHyVpAdzmONa090+30wWGr9IQBV6ws/L1u2LPeVTDsLKAU65m9/+1vLgJPt7XYnWOA1hymBsMUWW+TvLcZZMC4UGWwFOXQPMgOLnqR7BBYK9TzrWNfxX9jJBc/guE56N5g8IUshS4Mk5CnkadjkrcbOCMFxQXcfSS6SFtpRn6+bH1BjwUiw/vDDD8/BdB/HS7wJgiBoRQTWgxlDcGjevHnZ0GoVDFWaQVDCpCgAIbjgIzgqy1gJD4acDGMIgplkGWntkAHYJF6yM/qELAXdsOgiq4WsyDqRhaK24znnnJO/Z0D305/NWv7N44sD1/x3nQ1T6oKC/Nb4ToC9m8wFwwsZacpQ/XMvfV6CCXUZKrsZ1JbldJYMdJlY9JwdDQITrWTO+cldMPM09Uerftdftrabi2rqhZqCuq92PWy22WZ5h5XsPwEs5aJgS7uSZYUb3/jGbe/t9NNPz4syAhDmRP9WcqGmKbut7r8XvRtMnpClkKVBEvIU8jRs8lZD5k477bRc/kd/8dOUflGOsxW1LdRPn0uusiBEZiWveFeOMkNBEATtiMB6MGPIDN5jjz3SEUcckf/LGLJ93YSpFqy6feXFf7IRvJzSFjCZnLbX24p4xRVXpHe/+935GIFTE22dSayen3IOtnMFs5eQpaAb6pkLSH7sYx8b+x0Z6WUxpFWpjX4gT80XR5bFnoLsmdppZdTbwtrM7CGPvitZ6mT5Ix/5SJbxYLi47W1vmzP4vDCrBBy8D6Ig007QoZSwsjBYMtBrmRNAtyBYc/LJJ4/7WakXL+N6xjOekX8mW5xF+rDg59jdMDrow3/961/j+l6ZKH3qZWxkpOgvtYe9gE3mZwksCTaU7+2MqHdHdHs5oOCDWsRloVm92bvf/e7TqneDwRGyFAySkKdgppBtbnexxCgfiQRkrxdbvZ8+5/N539EHPvCBcTabkkJBEAStiBrrwYxy0EEH5bIctrnLIJaFt2TJklwHT3kPW75KMNQk583csjYFBwQoBENtb8cjHvGInLVn9do2RUFVtfh6NdiC0SZkKegEGbA74Sc/+Um64IIL8suKBCeb20xbsXDhwlyXU5B0IuV+1PsUOLdt2gtSBcG/8IUvpH322WfsGM6nDC6ZXGRNXVD1Q2sEZ2Vq+f7cc8/NCz0HH3zwhJzKYOqRWeUFWl7wRo/IJq/f00A/CRD4Tm1135OVInOCERdeeGHLcwto1B/Bdw5j2fYsu0o2u0x3Mieby4KMzMFgNLBIIkNOHwoeKb3gJWrFsScjFohl4dFv5MlCG5kRgDr66KN70m9NnMvCny3ytr+rjewdEhM910T1bjA4QpaCQRLyFMwUFnS8W8aLTdnkJ510Uk6S6sVW76fPHUvO2NpsKIlXbO6QjyAI2hGB9WBGseVPQEH2g7eAc/ovu+yysW2BsowFF7zh3Uq0F41AzTwvwbGVq2RACDodeeSReUIVfJJZLHClHm0w+wlZCjqx11575QWTF73oRTnjyUtsvXxWULOboSxwLXipjmOdWd4rXi4qQ8Y2ZudQs5FD6D4Ktpt62SV5U3/005/+dFq8ePFa57L92oueOLYWkSwgvfSlL+37noKpx5bmQw89NAcX1OX0fgd10UtJH0F3NT/pKrtmBCeKXtpzzz1z2RYv8qpfitsre++9d5YLtWjJfVm4iWyr0aHuQ+UTBAPoDrVfQaY4/PrXonApi+Dn4447Li8Mkx3y1w8HHnhgXkT2AmZ6xlZ6W+Inovsmo3eDwRGyFAySkKdgplA6SE30Aw44IMuZRRu2Uy+2ej99LimL7EpSkPAgIE+O2edBEAStmLMq9jwFQ4rJ8phjjskGlJffBMFECVkKhhlBdpRSRMHsQOCAE1be3QABdDtgbGE+7LDD0imnnDKj9xgEQRAEQRAEQRBMnMhYD4YWAVAZCREIDSZLyFIQBDOBrCplf2yL//GPf5y3zzdL/ARBEARBEARBEASjSQTWgyAIgiAIBowt6x/60IfSF7/4xRxM9x6Ifffdd6yOehAEQRAEQRAEQTDaRCmYIAiCIAiCIAiCIAiCIAiCIOiDyFgPgiAIgiAIgiAIgiAIgiAIgj6IwHoQBEEwaX72s5+l29/+9lN2fud2jYnyjW98Iz34wQ9Od7rTndKTnvSkdO6553Y8/rvf/W6+Zv150YtelEaNCy64IL8ssx2HHnpoeupTn9rTuZYtW5a+/OUvD+S+fv/736df/vKXAzlXEAxCj/QzFqaaFStWpPe///3pXve6V9pll13Si1/84nTppZf2NEYf9rCHddWVNqt63vvc5z7prne9a3rJS16SLrvssrHvm7rv7ne/e3r961+frrnmmo7nfcADHpCOO+64NIgX/5544olpKrj66qvT8ccfP+nzzFZZ+ve//53nut133z2/ePld73pXuu666yZ8fjJRy9Juu+2Wz6+PO6H9tOOw9Peg58TZKj9///vf07Oe9ax87P3ud7/8wu5ONHVN+ZQ+I3uve93rstzc+973Tp/61KfWsjvL5453vGOWt27XvPDCC/Px/jtstkx5losuumgt+01ZOd+VceHF9z5TbbvR6dq1He5nxx13TH/605+mbE4YBL2213Tawq3m5qOPPrrjMf/73/9yacF73vOeeW72TH7XZPny5emRj3zkWnrU+37YCXe+853Tfvvtl+WsHhetPr/4xS8G/KRBMPuIwPqIwzB9+9vfnu5///tnBfnQhz40ffKTn8zKdDoxEXz84x/PE+hd7nKX9LSnPS39+c9/HkjQqhdFf/7556dnPvOZ+dru4YgjjkgrV67M35nQ67/ZYYcdcr3bbob2IAOFP/nJT/I9TkW/77TTTvmZZ1u/F/SjF/494hGPyDLumT37FVdcMWVG4kT49a9/nQO27lEAtylfP/jBD7KBw9l4+MMfnr7//e+PfccoqtvEOXoJ/g7K+erFkJsMAiTdnOip5Mwzz8xG6POf//z07W9/O/fBs5/97I5BInJM1n70ox+NfcjdqMEh7SZHvaLt6NZB8IIXvCD97W9/G8i5gmC2YV494YQTco3+Y489Nl155ZXpVa96Vce/EYB62cte1jK40eSYY45JX/nKV3LAjO7/z3/+k3Vkc36h9374wx/mcU+PvPe9703TgfvqtCA4GT7zmc+kr371q2ldoR9ZYguwyZYuXZrl4uCDD06nnnpq/tvJnN88VGTp85//fD7m1a9+dZoOprK/Bzknzgb5Ya8/5znPSZtttln62te+lt7ylrekww8/PH3zm99se/7axvLZf//9001vetP0wAc+MH9P5/zmN7/JfsCb3vSmdNhhh6XvfOc7Lc/xve99L8vaRz/60XzP08FU2DLrr79+OuWUU9ay3zzfnDlzxn6ms5t6e6bk9Prrr8/9vS4wle0prvHWt7614zHGwXnnnZfHJt9ffMHCdxOLUI6rsWBDZh/zmMdkG2DzzTfPvhHdv/XWW681HgXgxRl23nnngT9rEMw2IrA+wsgqefzjH5/++te/pve9733pW9/6VlaWjOEDDjhgLLA8HXzpS1/KCvwNb3hDNmBvdrOb5cAV43yyQatuit41GHI3uclN8iRhwmGAWdkvbLXVVmN/e9JJJ+XjTUK/+tWv0nTw9Kc/vadss4n0OwNW1tls6/eC7Bj9+bznPS/L+Lvf/e6cHcL47pRFNZ1cddVV+bkFbMs4JF9nnXVW/p5hc+CBB6bHPvaxOeAuaO65aoNnr732GmsTDong+nOf+9yuGYLTZchNlH/+8585G7KdTEwHl1xySTYcLWxsu+22uX8szHRa7PLd7W53u7TllluOfRYvXpzWZeKVLEEwfVmir33ta3M2+W1uc5u8gFrmk3Zz6xOe8IT0j3/8o6fzC1rvvffeOSuZnjOf/vSnPx13zCabbJL1HtuKrWU+mqos8unUNeuaHutHlv7yl7+ks88+O2ep3/a2tx3LLmfXTOb8G2+88Zgsbb/99umlL31pOv3007PtNNWELE2f/PBz9O+b3/zmdMtb3jLd9773Tfe4xz066q7axrr22mvzwgu/gMwsWbIkB/MFjyVF7bnnnllXNRNByt9vs802aY899sg+4nQF1qcC405gvbnzgs8qK7+gjXyGQU6Nbfc3VbtDhomZ1CnGhDjGG9/4xrxLwLiwAGPRpfaJ7Rz53Oc+l8dsjfHk7yQj0vF0PT/t5z//eZo7d+648SiT3bXe85735MWeIAg6E4H1Eead73xnXtW3YmkSFjDiKB111FE5Q7MOLE81AoGUtKDpdtttl40qgat22+P6CVp1U/SCgjIorJTf6la3yoacQHadIVGfQ/DXSu3d7na3tbIeRrHf582bl8tbzLZ+L+U7ZEvJOCLbZFy/eXaBhK9//etpGLj44ovz4oYsHvcou57BUtqBU2q7ni13t7jFLdI+++yTn6MOUixYsGCsTcjxK1/5yuxkNIMdw2AkNrf6Mt6acKSe/OQn5wUDcMxkRbbbMdDc9i0riUOmnRiCzW2YHC/f+bziFa/ouIPBPVh0gjYlTze60Y3SrW9967Z/Q1Y5hv3sShG4t+An08qiU4F+svhk2+auu+6a+9bvys4YbfGFL3whb7sXwPK9Z4Q2efnLX54XDO0K0SZHHnnkuL77yEc+krdI0wcWoGSklJ0QjGVt2evOBm1tRw/DW9vSqxxr96kPGeBlC3Wna4Nja/eGNjF+Gf5wL87jfIPYlhsEU0G7sQBya2HboqFFUPL9u9/9Lmf4Ggvmg1q/F33oWGPcQmzRh62wECuIBLt93IsgeDuMc/coE70XNt1007yLykI9nSgDT0CsEwsXLkz90E13WVguu7zoPnqq/B3bwqfME3TOIYcckp+RnullDvn0pz+djzFPma/Yjv7OdbTXVJYuG1VZYn8oo7HFFlusFdQbxPlrWaozb7uh3/Tvhz/84dx+2kVAqNgu5h22qL4mZ29729tyBm2r/iYT2tu89ahHPSrbWE1ZaJaMYGvqv7KbUP+0mhPXdfm58Y1vnDPbFy1alPvGtfho3eShoH/1H1up6Ag7cfVrgQ11zjnndEwi2nDDDVM/kC2Z9fQEf4rdYuGnX1um2HN0nvvkq7QqP1KXARIoFSQt9iz4sWzK2n6jr/XnRhttNHae+txKgbzwhS/Mx1gEYRcbt63k1PmMEbYqG95x/cpFE77Nvvvum3cYtCpLUmAbF73sPv7whz+Mfef3EuQkIekHY9p9ey73xcaud2QJ5Ltn9+tv+/F/u9nek2lPz9TUCc0dxnbvPPrRj87PxWfU347nJ3YqFbXeeuvlbPnmfE0f1IlYZMr9y0ivMXY8U62LBectqDb5wAc+kBfsO/lKQRDcQATWR5TLL788T+wmPkHjGiv2JiWTF2NGYLLwoAc9aNz2yw9+8IN5ssAf//jHrPSLUVFnBHRzkAQUTQwFBjOjql02Sr9Bq06K3uQiuDN//vxxx3VyBCZieJmAtYlrM65M8LYnFgQXS2kWgXtB7vJ3MFlqR4Y+w1zWLMNL8LjpEDbr/zFmGc0MEYsBgoIm19qoY5DOtn7nWDPob37zm4/7PadPFrvnAgObM8io8RxNY03wQJYLw4kh08zq69YGsp0Fw7UrQ7eJxQLGpOd3LzJN7ChgjME1S3vXdMrWsmDSlOlOFJk5+eSTc8YOOZRhWALOnExZ9Ax3ciQ4oV1aGXLFWNe3+tw21/KdjAjGrvNynhhvHDiOHBiJ+swxanKWDArt1q2UQUFwyHiyiNRqGzf5NfbIouOMdTsAusFw9ewcFRketYNSQ4b1n90D5EF7Kk1QDO4mDFrjk+PLgXUvHN9SFoljqg4nY1iwxzioHS3Oi8VCMkze9GGd9eO7DTbYII8Hjp97cX+woGYRkV7UbnSDttffsrw8r597qVWrjyxYKCdBz3gGTo6yRc6jzcruHzuJOl2bztLfZNC56CbnJTfuxXmcr5dtzEEw3XQaCwVzkDnBHC5QrRQauTcWSnDFfEDPGwf0oUVWW7fNQYIu3TCvCzJZpO20CPWUpzwlj6deg9/sD3OMoJu5nb1Cr7ZD/XWZpPV83wuddBf9wH7TJu94xzuy/pNJT4dYDPXR5gU2jcBJq7m0CTuInnesa9P19LKgWAnA0mPTwSjJkkQHQaaCa9LzEgMGcX4I/uhrAahesm3rIBrZIQMWqs39agVDUItNb97kD5A7NZDb9bd5S9vZAdktwC/Aap7SJ/pHcFsftJoT13X5qXFdekk7saO6YXHEPbG3692GduXWtjD7n13ZLpmC3WWhsF9dxT5TTtU93OEOd8gyps36tWX4ZGxFvp7M+W6U3a3K15T68YKf5Im/VOw3pTTZop36R3sZH8aGRQnnbCen7s+CDD3J5p2oXNQI5JpX2ISt4Bu5nrall/nA/I+SaAILM/xOAW9+Bf+J3JF581uZp9jR5Jyf5VlcW8KdduqVTrb3ZNqznW9RsDgg2YevZNGOnJB7yYLFVnfuekGpTsIyb9djwv3x0UoQnc9kjIhXNPFMFsBq2O7/+te/xv2OTAq2k4sgCHojAusjym9/+9u8ii8Q2AqOkmChCacEAgXRTJR1NvEZZ5yRjWgZS4L0JdArCGsC6TW4Y/XTJFOQ0eD+nG+yQatuil6GTVnlh2dhUHdyBJyHQc6I6gcTnjIy2ohDUMqYmPwFVhmvAmvaQ6CNUVYcQ3/LQCoOgu1Z7lPWTC9OsMnQajr0qeBv06ibbf3OkBEgboUALgcCHCkGqXZwnzL6BdJlgoBDrS/cn+flhBR6aQMODMPH37Ubc/AsvmcwWXgpNeksAjHUa6NKkFfQuhXaUP8ysnpxaJvOAcOTM6zuu0AunE/mkHYik5xbgWtGYStDjrFHhj/2sY+NWwxxjCDL4x73uHx/nCfHySaE9pe1oi0sdJRsc5lhJcOqG8YFg9lClcBLXS5ISRnPxrnU1oxJY4+814sprbCLgPFrazvHsFWGRnHwXIfhysgnExzxdvWFOaucLA6fHTEcOu1ON5Fh98bgdr8+/s3BsO2+XvTwLMalj74rkHP3ICOIXPu5LOpxCDh9dCA5U9KHkyIYQEfpIwGHMlY64TgBLgtWnsNCgW3H5FVbOF/Z/eO/na5N73gu+sF4pPuMK7rEvfj7XrcxB8F002ksFATYBI6MS/MDnWEcGwsWao0FpRHMMZxm87gdTeYa5+2lFrp5hL42VxhD3RIGekXwh4NuvhAwL3ZETSlvZh5zfXZOv+/06KS73IOf6QeBAjpTGwtKuDefOtvuiU98Yt7N1dza3goBRTsXBVbNXyUjtPQtvUiPTQejLEvmKv2udMtkzs82LrLkngUo+w3YsB8E0MmAa7GpyjxJlswlEovYwQKCsn7JUKv+Nkebb2u7rJMs6ROZsfrHnOdnfdKcE6eCUZUfgUn6RVKB3QXdcG7PwbYvFDuspvxc+w5ky4e/wK+jU3rxrWrIi6QoiTxseDtR+V0TsWXoOn1BHjuhX4wF+klfyBwugU4+EttS//vw20rd+VYYA3QnGWE3s8ktArSy3WBhy1jR5pORixrJNhK92PCt3u3DZjTu2fb0Mt9Bm/K9CvpAIN198X/Y7cae/xq3xW52DePAIgedIADP9+j24tqaTrb3ZNqzF1l3rDbXDuIKfB5jSgk2OHcvyVX8IXGHkrhkIYgPyB5vtWjYbkw1fXHtK/CvxE8QBL0xr8fjgiHMWEe7VdGimE0GpU6bjCSr0bY9MrAYmoI+JhJBIxO5SQ4UvUnFKqiAWO0gmUQYDbJFOUhKgNRYabdqLAjbynFpBq1kzAqcmdhbvXyjH0UveCpgJmhYG+2uWQKGJlIfwd26Vl0vmLhLxsAznvGMsSxZbWUCY0SZhLUjw8H9FMdQn5T+cizDjdHfDX0kEG+HQgkEclKsRjeNutnW74zKboE3AXuGBeOqGJ2cL3LCWGMgaj/ZbvqHceb+SxmgXtpAwJRh1wucMIYfo8a5yEmNoLTMCkZVbSS7D4sY0LelrmW3zIcmgsYl+O8lqcVI1N4cAUasPpWpJRisX2tDrlBeitvE4oi+d64CeXaf+peB68NAlDVRsiA8X6sFl1bIRGHgFwRSyg4T2/mNX7s+aow1mfX62WJAgbyWbY/60cf4IK+yGmVu1Ats2kzfyc7XLp7N8c5vYUufMO5LyRMyJTuKfJBjDpexzwD395whWYD1eOHs+o6cFPnmhNXOSf0iYjqldtq1te/pOe0r6GEHS8GYavUyLU5us21qOAR0EodY1peFCmV/Wjmo3a6tDTga5N+zk3Xvhui3nEQQzAS9jAXjsuBYuqXM6UU/clYdZy6x+0aQqZzPHADzal2DuH73S9ELFvUEn2XUCTz0Q/P8FtnN6Rxx4xTmZP+mF0tgy/zs3+ZYNqd5lp6jy80DAu8F9pZdUE3a6a7yNwIA5ky6QuCuU7Db3NUr7q8EqaBvputlmbNFlgTVJRPYkWZX3mTOzy4pOwyVWCBDApRsejLRag5uwk4zN7aaJ92XhSHZqq5tQaWTfd+vLNX2Bht2OmVpVOWnJMWwZ+0coW8EEtudn/3btOvcezPgV36u/aeSCMNuZpvRK/SLRcNWdk+dEFSoE0iKnJEv9l+/tkzdH53Qj+651lUoY4XOZXtKxDEGjYF2yPzWvhY+fPi4xlIvY4De7SQXNRYB6lKr7N+astNIadBmGUe2Pb1S744iH7W9KrBf0Mf1ffqZ/V/O1Ux04ueXMozN8kHtAu7tbO/JtGe/8xOKD9p8H1un55AwZZ7mlxR9YBHOuCw6u0m7MVWXZdUGEsqm62XlQTBbiMD6iFKyD62kt5rAS30zgWCBHgFK2aoCrBykYtiUrUMCPIKttQI32dcOUScHqTaSOFuMrnalGUw+/QatelH0vmfsqkMnK7d20ATPGFjlOJOawKLtaDIpuhkK7QyvMsGb0ExiJl3GfDG8bIlrBeOol6A63Kv+ZmwU48OEvC70u+eutwi2wuq8AHGd5WLxgDPC8HI956kzRxj8JbDeSxvUBpOFinYBBQ6XvvGxxZDM1YF1BpOfBSo4SXVAUhC7bHFnaOorssnYYSRxPFuNi25GYpFR2X7+hqzaMmyBqJNT1M5IbCXTZNTYKAYhvWT7pHYwHji7xkPJNGmVRdGUqWbd93LdkvVum2iznJNxJSux1HWHRTiZM/qzNmQFt8kH3VDvTigOVTPD2/H6hTzKhiv3W+6LE6FckAUwH8EiY7BdxonnqDP4m8fVz9/qpUG+L38vk6a50FUWS2o4rc224bAVZJpb0OBgWXjzbzsD2t1/p2vrY86stqe3yYA+8+lWyzkIZppexkJTF9b6vIY+tNBG/8jEsz2bnSKIXRxhC1I1FgjpzpJEwBlmA5Skin5ont/irmzMuq60nUtKLlhULnOpa5f5hH53/+XdIHYs1Xqzlb7ppLsgS48+oi8lAsjYsyhurmhFczG30xzSzvaaCUZRlvSDOVwQrC7jMdHzm5tr24QNpuyPHWTKPrSag5u0mkuLLJWydWRJewjks9PaZdr3IkulT2ZalkZJftiAEoDqkiUSI9ihki1anR/0kWBuMyPbNV2n7g9Z5PynOhBYy5bEDn4Ke8cO3lZ2TzN42UlXTcSWacpXbc/VeqrTCyFLIgq73/2WOvftIP/GlHvUp/xaO0ztDO52j93kooaPJ4Gq0CwrAtfmD2ujpt1oAay5W7ce882dH+1kub7/Ap+y2Ka1Tunkb7ezvSfTnoOcn9o9h3I94iEWrMydBX6e4yzKwHjjo/N5fddK/stLhwvGsPsVOwiCoHeGx/IM+sLkRzHLvG0VYKVEBTsExxgcgoE+Aq4maRlLgkSlliIFahIxcbSjk4MEQVMBRopYfbV2k+FEglbdFD2jjQEtO9TfN1fZnac2vFzPZCOQqYRKK0NBZmwvbQCZC1bmlXxghHIWOCX+2yq7vpVBUFMH3Oprrmv97nmVPWqFjAfOWrvgsDYsLzdqBmnrZ+qlDer+ErBvBhTIikWPujYpZ6J2PBiupZa5bPjmC2U4ArWMWqiROcI48oztxkWvMipTX/CCgeij/dRSrOvJt3vmGgEWWTay0+uxWdet53hoF86ITHkBZo5T6Qf3WL9kx+/rl/y4V5n2xcnyXVk04tAxvC2mFEPQ4opxbIFGGzblTPaMgBFDtECuOItN3VAcWbpBO5WsJH3hvM1+K46eZ3R9O1F86BPtrc/du0UFDh84kJxMY3UigbICx9IYcH0ZVSXzxO6N8nLZGvffqSQMHca5siMG5E2/lnJEtbPQ7dq+1+4WO2UVycaxOKRtI7AeDDvdxkI/1PqwYNG16MNWNoLdX+btsvOOvjDHTOQlYs3zexbBBAuL5XyC7XRqp0xLc3tZ0OO4N/VmP5j3BW0FPy02+5iDZa0KrDeDUU26zSHuzYJ52XVFz5ZMyn5emrkuypIawbI+2QjKjtQMWlbJUqs5uF9k1etfOyp82EvK0vELuvV3sZncdwnwkaWSSFNkqb5nQU7yOx2yNEryo928U0ZAspyLv8JuamU7FUpN8WbpFLYC+WBjlp2Hkk4szHTyNcrz8AG62T3doCcnY8uQr9rOq33LYs+Sr/qFkpK+7Ahw3xJX+G8WIjsh29xiqb7yEURlk6KbnHaTixp2X6fM+TJm3K/Ei7qf2Lx2Otbj3T1aiOlU5qYVziVxrJUPXO5hMkymPYtOqeeo5vzEp6ixAKRsU3PXVqvnoNsE1d2Pkmc15KaGL2OxvCR5+Xe9a4Qvp9yXcVuPR/53t1hFEATjiRrrIwoDxURki1sJtpkEbeMT3KV0S9aPDFVZHAJLAkkmb0rVymsJBJqITN6cKkrchyFTsry7IStAMMn5bCnutArPGJH1VAfm6qBVCQr71FmznRQ9h0xQ3Ra/Xt8+z2DwYXiVbJry6TdDxWTOIGHomuisDJes417g5NYTcG14uR8Or8Bk6Xe1sUuWNCNXbdLZ2O+ykDxDc5FDkFpAWD8ppWHba10v20KLwKnnE6AWuBcMrq9b6LcNSkChfNy/TBYOXJ2Jw5kogVS13o1NBqZAea8164p8ot246BWLARZ9OJ8cJdsJyYOgdC/OYQlkkCvtbeFIO8vmUIex3hZbnAHOAbkt/V+2H1qcINPaWN/KzK93JqiTaPFBkIWMCZoX45zjS8ZliFvUEaSWsaF/2wWFZOsrhWRbO4fQbgF91jRICwLS9IzSLgLinERGrD5sBYeEY2JccngZ+xwl447zaScHp8w1ffzbexDabdXsB89g7Aniezb3bAGtyJ6sfr/Xz93Qb3SZLcD6VFktgfPSb/pYPzmfeafTtQXWyYgFB31skaLoonJf2rbdC8iCYKowBn/4wx+O+9TzUi9joR+cS2DE7hBjQdCP89vpXHa/WAike1zfzi61f+mS+r0OE8G8abHWPEBX0bHOz+Gu32lirHtmH+NbeQ5BxVYlwvqFfqUrZEbTAxZSJQEU/UDX0Bfm+lZ0m0MEKOh79oO5SFDS/ODj3HaU1YGOiTLbZEkQkc4u750p/e8z0fOXa5TzaHfvdTFnNwP3E6WU3zPvugf3UstSp/62kF/eN6BN2UblZexFlpQVZF+7Z7LGNuOPNOfEdV1+6A/tIiuZbeZvLEC0KhNV47ytAvXaV6Y+e09bGc92JJcklUItp2xv12SHDMLGmqwto034p9qcriWnxVdiz3o+mfyer5RuLOdWsoYvU3Ytd0LA2rn5L+SR/VyPgU5yOhG56IbAOt+u1suCu/Qyf4StrJ/sgJrIgrF3CvDlLADS8canDHnyOggm0558JAtFxo32lGRHbgoW/8x3fHc6RfzAGOCnF1+KHIsjNCFn7kuw3wJPLfvm59o/9aHb+CjFb7RIZ+7Vx64pZmFerN9V1248BkHQmQisjzCCTQwaBjAFTSkK4DECTZLF8BAI8yJCk7vJnOI2UVPO5eWKApiCggLUDGvGkIm+26p0wd+ZRChoK/NFyZdAY2149Ru06qboGSwmLcamSaRcWwZWwWRTfs/A1l6MJG1TbyecKCYuL9CUWcJ4t7KtL8pWa4aX+9cO7ZxExkUJvAn81Ua/gL3+ZlirHVkcXc9hwjTBz8Z+96wWSmxz0z4MFH8nI5Ys2I4OAT5tJsDnOQR0GST+3nEy0hn6nC6GedkiN4g2gIxdAX7nYOApK8Q5s+gARhMjUiADpZ1qeXAP5feCCSeccEI+T72FdTK4lucqhrNzM9pt/+9myIEcaTfyTcbIn7HknLZylxfF+r1n4XgaB6UNGIHal6EuE0xw+fDDD89OhWPr7eZq7drKLejCeLYboh6nxro+dYwtqwJFjMR2Lw/j6MnCk3Wkv/Uxg7fdAgdnx/d0CCPU2BOcbyerFsboE/Ll/DKayGZZ5NLvnCJySnaNafpiEDifa5E9bck5cu+lNIN7sKjRTb9Cxgq596wcIOOVA1AWoughOtYWX7/rdG1ZN4InnBHGP0dANnupA+m8Fse6vVcjCAaNrdzspvpDl/UzFvqBDqcX6Cv6xIIg/Vf0YSsEB4xZASVjzOInfVkWGOld7+qYKOZDdXzpbrYD/UqH1Yuszm+8+hjf5m3JC90CPP1kGQsiej66hI2ipm2ZA8yl2q1V1mQvc4ga3kpmWEQwrxW7SraxBWt6qZcFx3VJliyUs5d9X/q+fNrR7fyw6FzOwy6zyG9RtlUN54ng2oJZZJlNYNepebuX/jbfszXYK178aR6vg3MWwS3MmLO1vX4ThGf3N+fEdV1+2GD0CJvS/eoDfdIMhDdRiqJdOSn+BRuOH2A8u1apQV4osiXJx7hn27IJO2W198pkbRm6iG6i27QjGavLp9DFXqKrb0uZyWI7ehaB2152Ntp5bTyxuV2TTS5wjW5yOhG56Aa7uLkT2NiXhEQXawf+CFmqy6z2it0NfCv2redyHr6B+x8Ek2lPclcWSzyzZLt6ccnCFJnyHirtQLboFD6JuAGfRwY7X6VVzMO9WEho6uiSyNQJQfRybeOZj0631XN/p/EYBEF75qzqtNcyGHoYiRQiY9jEa6KRTSR7kuKWWSHgJzhpQi/1r03ylHcdwBV0ZfyaCARoKVwGjAmCEpYRXWfxug6DjyHTzuh2fU6Nyc4Kf/l7QWbXshJcauGpG9gpc9b923bHEawxcatn3MTqrECroHvZvgXP4/lkfpvg221PZFgwBgWz6+ctpUea3wtiMygFl/QDA6W8kMmKum1lnpMBJchXXi4KE5t7NGGaWBmjtl3qV5MgQ5tB6XtOgMlc8NI5TIAcIf1tOM+2fucQM5CVLWE0cJ70nb8rE7/n90wCpzL4BfEZqyVbxdgQbJetr28YOuSi9EG/bdAKBijnzM4KwWrGWDGOZWUJEjQRbFbrXzsxkgoCxRwD9yHro13AuL43AW9bKYvMNL/nWCrVQ04txFjM0SYyMBjOjD7HktXSLu6tIIOL4yDjRpDdvTMEy7EyqrQhI1Pgxdgqzo8MJwsjtiS2yxIPgiAIgiAIgiAIgiAYJSKwPkuxminYLGOg+YK/YPYS/R4EQRAEQRAEQRAEQRAEU08E1oMgCIIgCIIgCIIgCIIgCIKgD6LGehAEQRAEQRAEQRAEQRAEQRD0QQTWgyAIgiAIgiAIgiAIgiAIgqAPIrAeBEEQBEEQBEEQBEEQBEEQBH0QgfUgCIIgCIIgCIIgCIIgCIIg6IMIrAdBEARBEARBEARBEARBEARBH0RgPQiCIAiCIAiCIAiCIAiCIAj6IALrQRAEQRAEQRAEQRAEQRAEQdAHEVgPgiAIgiAIgiAIgiAIgiAIgj6IwHoQBEEQBEEQBEEQBEEQBEEQ9EEE1oMgCIIgCIIgCIIgCIIgCIIg9c7/AxmIkbuzkPrVAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mnotebook controller is DISPOSED. \n", - "\u001b[1;31mView Jupyter log for further details." - ] - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "from matplotlib.legend_handler import HandlerTuple # Added import\n", - "\n", - "\n", - "# Assuming pivot_df is your original dataframe\n", - "models = pivot_df[\"model_id\"].unique()\n", - "sources = pivot_df[\"source\"].unique()\n", - "\n", - "# Create figure and axis\n", - "plt.style.use(\"seaborn-v0_8-white\")\n", - "fig, ax = plt.subplots(figsize=(15, 6))\n", - "\n", - "# Set the width of each bar group and positions of the bars\n", - "width = 0.15 # width of each bar\n", - "spacing = 0.02 # space between bars within a group\n", - "group_spacing = 0.2 # space between model groups\n", - "\n", - "# Calculate positions for the bars\n", - "num_sources = len(sources)\n", - "total_width_per_group = (width + spacing) * num_sources * 2 # *2 for agent and vanilla\n", - "x = np.arange(len(models)) * (total_width_per_group + group_spacing)\n", - "\n", - "# Plot bars for each source\n", - "for i, source in enumerate(sources):\n", - " source_data = pivot_df[pivot_df[\"source\"] == source]\n", - " agent_scores = [\n", - " source_data[source_data[\"model_id\"] == model][\"code\"].values[0]\n", - " if len(source_data[source_data[\"model_id\"] == model]) > 0\n", - " else np.nan\n", - " for model in models\n", - " ]\n", - " vanilla_scores = [\n", - " source_data[source_data[\"model_id\"] == model][\"vanilla\"].values[0]\n", - " if len(source_data[source_data[\"model_id\"] == model]) > 0\n", - " else np.nan\n", - " for model in models\n", - " ]\n", - "\n", - " # Position calculation for each pair of bars\n", - " pos = x + i * (width * 2 + spacing)\n", - "\n", - " agent_bars = ax.bar(pos, agent_scores, width, label=f\"{source} (Agent)\", alpha=0.8)\n", - " vanilla_bars = ax.bar(\n", - " pos + width * 0.6,\n", - " vanilla_scores,\n", - " width,\n", - " hatch=\"////\",\n", - " alpha=0.5,\n", - " hatch_linewidth=2,\n", - " label=f\"{source} (Vanilla)\",\n", - " color=\"white\",\n", - " edgecolor=agent_bars[0].get_facecolor(),\n", - " )\n", - "\n", - "# Customize the plot\n", - "ax.set_ylabel(\"Score\")\n", - "ax.set_title(\"Model Performance Comparison\")\n", - "\n", - "# Set x-axis ticks in the middle of each group\n", - "group_centers = x + (total_width_per_group - spacing) / 2\n", - "ax.set_xticks(group_centers)\n", - "\n", - "# Wrap long model names to prevent overlap\n", - "wrapped_labels = [\"\\n\".join(model.split(\"/\")) for model in models]\n", - "ax.set_xticklabels(wrapped_labels, rotation=0, ha=\"center\")\n", - "\n", - "# Modify legend to combine agent and vanilla entries\n", - "handles, labels = ax.get_legend_handles_labels()\n", - "unique_sources = sources\n", - "legend_elements = [\n", - " (handles[i * 2], handles[i * 2 + 1], labels[i * 2].replace(\" (Agent)\", \"\")) for i in range(len(unique_sources))\n", - "]\n", - "custom_legend = ax.legend(\n", - " [(agent_handle, vanilla_handle) for agent_handle, vanilla_handle, _ in legend_elements],\n", - " [label for _, _, label in legend_elements],\n", - " handler_map={tuple: HandlerTuple(ndivide=None)},\n", - " bbox_to_anchor=(1.05, 1),\n", - " loc=\"upper left\",\n", - ")\n", - "\n", - "ax.yaxis.grid(True, linestyle=\"--\", alpha=0.3)\n", - "ax.set_ylim(bottom=0)\n", - "plt.tight_layout()\n", - "ax.spines[\"top\"].set_visible(False)\n", - "ax.spines[\"right\"].set_visible(False)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'formatted_df' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[12], line 45\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m mathjax_table\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# Usage (after running your previous data processing code):\u001b[39;00m\n\u001b[0;32m---> 45\u001b[0m mathjax_table \u001b[38;5;241m=\u001b[39m create_mathjax_table(pivot_df, \u001b[43mformatted_df\u001b[49m)\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28mprint\u001b[39m(mathjax_table)\n", - "\u001b[0;31mNameError\u001b[0m: name 'formatted_df' is not defined" - ] - } - ], - "source": [ - "def create_mathjax_table(pivot_df, formatted_df):\n", - " # Start the matrix environment with 4 columns\n", - " # l for left-aligned model and task, c for centered numbers\n", - " mathjax_table = \"\\\\begin{array}{llcc}\\n\"\n", - " mathjax_table += \"\\\\text{Model} & \\\\text{Task} & \\\\text{Agent} & \\\\text{Vanilla} \\\\\\\\\\n\"\n", - " mathjax_table += \"\\\\hline\\n\"\n", - "\n", - " # Sort the DataFrame by model_id and source\n", - " formatted_df = formatted_df.sort_values([\"model_id\", \"source\"])\n", - "\n", - " current_model = None\n", - " for _, row in formatted_df.iterrows():\n", - " model = row[\"model_id\"]\n", - " source = row[\"source\"]\n", - "\n", - " # Add a horizontal line between different models\n", - " if current_model is not None and current_model != model:\n", - " mathjax_table += \"\\\\hline\\n\"\n", - "\n", - " # Format model name\n", - " model_display = model.replace(\"_\", \"\\\\_\")\n", - " if \"Qwen\" in model or \"anthropic\" in model:\n", - " model_display = f\"\\\\textit{{{model_display}}}\"\n", - "\n", - " # If it's the same model as previous row, use empty space\n", - " if current_model == model:\n", - " model_display = \"\\\\;\"\n", - "\n", - " # Add the data row\n", - " mathjax_table += f\"{model_display} & {source} & {row['agent']} & {row['vanilla']} \\\\\\\\\\n\"\n", - "\n", - " current_model = model\n", - "\n", - " mathjax_table += \"\\\\hline\\n\"\n", - " mathjax_table += \"\\\\end{array}\"\n", - "\n", - " return mathjax_table\n", - "\n", - "\n", - "# Usage (after running your previous data processing code):\n", - "# mathjax_table = create_mathjax_table(pivot_df, formatted_df)\n", - "# print(mathjax_table)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "test", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/e2b_example.py b/examples/e2b_example.py deleted file mode 100644 index 18354a372..000000000 --- a/examples/e2b_example.py +++ /dev/null @@ -1,53 +0,0 @@ -from dotenv import load_dotenv - -from smolagents import CodeAgent, HfApiModel, Tool -from smolagents.default_tools import VisitWebpageTool - - -load_dotenv() - - -class GetCatImageTool(Tool): - name = "get_cat_image" - description = "Get a cat image" - inputs = {} - output_type = "image" - - def __init__(self): - super().__init__() - self.url = "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png" - - def forward(self): - from io import BytesIO - - import requests - from PIL import Image - - response = requests.get(self.url) - - return Image.open(BytesIO(response.content)) - - -get_cat_image = GetCatImageTool() - -agent = CodeAgent( - tools=[get_cat_image, VisitWebpageTool()], - model=HfApiModel(), - additional_authorized_imports=[ - "Pillow", - "requests", - "markdownify", - ], # "duckduckgo-search", - use_e2b_executor=True, -) - -agent.run( - "Calculate how much is 2+2, then return me an image of a cat. Directly use the image provided in your state.", - additional_args={"cat_image": get_cat_image()}, -) # Asking to directly return the image from state tests that additional_args are properly sent to server. - -# Try the agent in a Gradio UI -from smolagents import GradioUI - - -GradioUI(agent).launch() diff --git a/examples/gradio_ui.py b/examples/gradio_ui.py new file mode 100644 index 000000000..81c56a1f2 --- /dev/null +++ b/examples/gradio_ui.py @@ -0,0 +1,25 @@ +from io import BytesIO + +import requests +from PIL import Image + +from smolagents import CodeAgent, GradioUI, InferenceClientModel + + +def add_agent_image(memory_step, agent): + url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png" + response = requests.get(url) + memory_step.observations_images = [Image.open(BytesIO(response.content))] + + +agent = CodeAgent( + tools=[], + model=InferenceClientModel(), + verbosity_level=1, + planning_interval=3, + name="example_agent", + description="This is an example agent that has not tool but will always see an agent at the end of its step.", + step_callbacks=[add_agent_image], +) + +GradioUI(agent, file_upload_folder="./data").launch() diff --git a/examples/gradio_upload.py b/examples/gradio_upload.py deleted file mode 100644 index 746013627..000000000 --- a/examples/gradio_upload.py +++ /dev/null @@ -1,6 +0,0 @@ -from smolagents import CodeAgent, GradioUI, HfApiModel - - -agent = CodeAgent(tools=[], model=HfApiModel(), max_steps=4, verbosity_level=1) - -GradioUI(agent, file_upload_folder="./data").launch() diff --git a/examples/inspect_multiagent_run.py b/examples/inspect_multiagent_run.py index 8c1c98d46..95032cd34 100644 --- a/examples/inspect_multiagent_run.py +++ b/examples/inspect_multiagent_run.py @@ -9,14 +9,14 @@ from smolagents import ( CodeAgent, DuckDuckGoSearchTool, - HfApiModel, + InferenceClientModel, ToolCallingAgent, VisitWebpageTool, ) # Then we run the agentic part! -model = HfApiModel() +model = InferenceClientModel() search_agent = ToolCallingAgent( tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], diff --git a/examples/multi_llm_agent.py b/examples/multi_llm_agent.py new file mode 100644 index 000000000..186fa06f8 --- /dev/null +++ b/examples/multi_llm_agent.py @@ -0,0 +1,47 @@ +import os + +from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMRouterModel + + +os.environ["OPENAI_API_KEY"] = "" +os.environ["AWS_ACCESS_KEY_ID"] = "" +os.environ["AWS_SECRET_ACCESS_KEY"] = "" +os.environ["AWS_REGION"] = "" + +llm_loadbalancer_model_list = [ + { + "model_name": "model-group-1", + "litellm_params": { + "model": "gpt-4o-mini", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + }, + { + "model_name": "model-group-1", + "litellm_params": { + "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), + "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), + "aws_region_name": os.getenv("AWS_REGION"), + }, + }, + # { + # "model_name": "model-group-2", + # "litellm_params": { + # "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + # "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), + # "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), + # "aws_region_name": os.getenv("AWS_REGION"), + # }, + # }, +] + + +model = LiteLLMRouterModel( + model_id="model-group-1", + model_list=llm_loadbalancer_model_list, + client_kwargs={"routing_strategy": "simple-shuffle"}, +) +agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) + +agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") diff --git a/examples/multiple_tools.py b/examples/multiple_tools.py index 39ed90767..a2685541f 100644 --- a/examples/multiple_tools.py +++ b/examples/multiple_tools.py @@ -1,13 +1,11 @@ -from typing import Optional - import requests # from smolagents.agents import ToolCallingAgent -from smolagents import CodeAgent, HfApiModel, tool +from smolagents import CodeAgent, InferenceClientModel, tool # Choose which LLM engine to use! -model = HfApiModel() +model = InferenceClientModel() # model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct") # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-20240620' @@ -15,7 +13,7 @@ @tool -def get_weather(location: str, celsius: Optional[bool] = False) -> str: +def get_weather(location: str, celsius: bool | None = False) -> str: """ Get the current weather at the given location using the WeatherStack API. diff --git a/examples/open_deep_research/README.md b/examples/open_deep_research/README.md index 915bfc894..c2c799616 100644 --- a/examples/open_deep_research/README.md +++ b/examples/open_deep_research/README.md @@ -1,22 +1,54 @@ # Open Deep Research -Welcome to this open replication of [OpenAI's Deep Research](https://openai.com/index/introducing-deep-research/)! +Welcome to this open replication of [OpenAI's Deep Research](https://openai.com/index/introducing-deep-research/)! This agent attempts to replicate OpenAI's model and achieve similar performance on research tasks. -Read more about this implementation's goal and methods [in our blog post](https://huggingface.co/blog/open-deep-research). +Read more about this implementation's goal and methods in our [blog post](https://huggingface.co/blog/open-deep-research). -This agent achieves 55% pass@1 on GAIA validation set, vs 67% for Deep Research. -To install it, first run +This agent achieves **55% pass@1** on the GAIA validation set, compared to **67%** for the original Deep Research. + +## Setup + +To get started, follow the steps below: + +### Clone the repository + +```bash +git clone https://github.com/huggingface/smolagents.git +cd smolagents/examples/open_deep_research +``` + +### Install dependencies + +Run the following command to install the required dependencies from the `requirements.txt` file: + ```bash pip install -r requirements.txt ``` -And install smolagents dev version +### Install the development version of `smolagents` + ```bash -pip install smolagents[dev] +pip install -e ../../.[dev] ``` +### Set up environment variables + +The agent uses the `GoogleSearchTool` for web search, which requires an environment variable with the corresponding API key, based on the selected provider: +- `SERPAPI_API_KEY` for SerpApi: [Sign up here to get a key](https://serpapi.com/users/sign_up) +- `SERPER_API_KEY` for Serper: [Sign up here to get a key](https://serper.dev/signup) + +Depending on the model you want to use, you may need to set environment variables. +For example, to use the default `o1` model, you need to set the `OPENAI_API_KEY` environment variable. +[Sign up here to get a key](https://platform.openai.com/signup). + +> [!WARNING] +> The use of the default `o1` model is restricted to tier-3 access: https://help.openai.com/en/articles/10362446-api-access-to-o1-and-o3-mini + + +## Usage + Then you're good to go! Run the run.py script, as in: ```bash python run.py --model-id "o1" "Your question here!" -``` +``` \ No newline at end of file diff --git a/examples/open_deep_research/analysis.ipynb b/examples/open_deep_research/analysis.ipynb index 04f315fdd..ccb6a1d54 100644 --- a/examples/open_deep_research/analysis.ipynb +++ b/examples/open_deep_research/analysis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -11,19 +11,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/aymeric/venv/gaia/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "\n", @@ -38,12 +28,12 @@ "\n", "pd.set_option(\"max_colwidth\", None)\n", "\n", - "OUTPUT_DIR = \"output\"" + "OUTPUT_DIR = \"../../output\"" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -52,29 +42,6 @@ "eval_df = pd.DataFrame(eval_ds)" ] }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2 86\n", - "1 53\n", - "3 26\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.Series(eval_ds[\"task\"]).value_counts()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -84,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -98,254 +65,14 @@ " results.append(df)\n", "\n", "result_df = pd.concat(results)\n", - "result_df = result_df.drop(columns=[\"start_time\", \"end_time\"])\n", "result_df[\"prediction\"] = result_df[\"prediction\"].fillna(\"No prediction\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "String cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 2 High fantasy A Song of Ice and Fire cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String 94 CFM for Cheater cannot be normalized to number str.\n", - "String 93 CFM for Cheater beater cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 3 or 4 cannot be normalized to number str.\n", - "String No year cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 250 for Cheater cannot be normalized to number str.\n", - "String 220 for Cheater beater cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String 776 ft/min for Cheater cannot be normalized to number str.\n", - "String 768 ft/min for Cheater beater cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String CFM number for Cheater: not listed cannot be normalized to number str.\n", - "String CFM number for Cheater beater: 665 ft/min cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 1.46 ร… cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String August 1: 0 August 2: 0 August 3: 0 August 4: 0 August 5: 0 August 6: 0 August 7: 0 August 8: 0 August 9: 0 August 10: 0 August 11: 0 August 12: 0 August 13: 0 August 14: 0 August 15: 0 August 16: 0 August 17: 0 August 18: 0 August 19: 0 August 20: 0 August 21: 0 August 22: 0 August 23: 0 August 24: 0 August 25: 0 August 26: 0 August 27: 0 August 28: 0 August 29: 0 August 30: 0 August 31: 0 cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String 120 for Cheater cannot be normalized to number str.\n", - "String 103 for Cheater beater cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 120.28 for Cheater cannot be normalized to number str.\n", - "String 119.04 for Cheater beater cannot be normalized to number str.\n", - "String 3 or 4 cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive sold for 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 2730-2740 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 89706.00 USD cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String No prediction cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 6 The Lord of the Rings (book) J. R. R. Tolkien Author American literature Fantasy literature Publishers A Song of Ice and Fire cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 1.46 ร… cannot be normalized to number str.\n", - "String cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 94.5 for Cheater cannot be normalized to number str.\n", - "String 93.5 for Cheater beater cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 776 for Cheater cannot be normalized to number str.\n", - "String Not specified for Cheater Beater cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 5.75 for Cheater cannot be normalized to number str.\n", - "String 5.22 for Cheater Beater cannot be normalized to number str.\n", - "String 2017 Komo Mai Drive sold for 900000 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String 33101 28557 cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "String Unable to determine cannot be normalized to number str.\n", - "Close call: Alfonso Cardinal Visconti vs Alfonso Visconti\n", - "Close call: Rockhopper Penguins vs Rockhopper penguin\n", - "Close call: INT. THE CASTLE vs THE CASTLE\n", - "Close call: to be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n", - "Close call: The World of the Twenty First Century 1994 vs The World of the Twenty First Century\n", - "Close call: Alfonso Cardinal Visconti vs Alfonso Visconti\n", - "Close call: Wes Craven's A Nightmare on Elm Street vs A Nightmare on Elm Street\n", - "Close call: God said let there be dragons vs Here be dragons\n", - "Close call: rockhopper penguins vs Rockhopper penguin\n", - "Close call: Harbinger, This Fire, Tidal vs Harbinger, Tidal\n", - "Close call: EC 3.1.3.1;EC 1.11.1.7 vs 3.1.3.1; 1.11.1.7\n", - "Close call: to be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n", - "Close call: Alfonso Cardinal Visconti vs Alfonso Visconti\n", - "Close call: to be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n", - "Close call: Out of the Silent Planet by C.S. Lewis vs Out of the Silent Planet\n", - "Close call: broccoli, celery, fresh basil, green beans, lettuce, sweet potatoes vs broccoli, celery, fresh basil, lettuce, sweet potatoes\n", - "Close call: To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles and by opposing end them vs To be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/aymeric/Documents/Code/smolagents/examples/open_deep_research/scripts/gaia_scorer.py:52: UserWarning: Answer lists have different lengths, returning False.\n", - " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n" - ] - } - ], + "outputs": [], "source": [ "import re\n", "from collections import Counter\n", @@ -395,12 +122,21 @@ " return total_count\n", "\n", "\n", + "def get_durations(row):\n", + " # start_datetime = datetime.strptime(row['start_time'], \"%Y-%m-%d %H:%M:%S\")\n", + " # end_datetime = datetime.strptime(row['end_time'], \"%Y-%m-%d %H:%M:%S\")\n", + "\n", + " duration_timedelta = row[\"end_time\"] - row[\"start_time\"]\n", + " return int(duration_timedelta.total_seconds())\n", + "\n", + "\n", + "result_df[\"duration\"] = result_df.apply(get_durations, axis=1)\n", "# result_df[\"tool_calls\"] = result_df[\"intermediate_steps\"].apply(sum_tool_calls)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -425,43 +161,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "agent_name\n", - "code_gpt4o_03_february_text 165\n", - "code_o1_03_february_ablation-toolcalling-manager 165\n", - "code_o1_01_february_text 165\n", - "code_o3-mini_03_february_remove-navigational 165\n", - "code_o1_04_february_submission5 165\n", - "code_o1_03_february_text_high-reasoning-effort 165\n", - "code_o1_03_february_remove-navigational 164\n", - "code_o1_03_february_fix-print-outputs 164\n", - "code_o1_04_february_submission 162\n", - "code_o1_03_february_goodoldtext-unbroken 161\n", - "code_gpt4o_03_february_goodoldtext-unbroken 159\n", - "code_gpt4o_03_february_magenticbrowser 159\n", - "code_o1_03_february_fix-print-outputs2 156\n", - "code_gpt4o_03_february_magenticbrowser2 156\n", - "code_o1_04_february_submission-medium 125\n", - "code_o1_29-01_text 105\n", - "code_llama-3 90\n", - "code_o1_22-01_managedagent-summary_planning 67\n", - "code_o1_25-01_visioon 53\n", - "code_o1_04_february_submission3 49\n", - "code_qwen-coder-32B_03_february_text 43\n", - "code_o1_04_february_submission4 6\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "result_df[\"agent_name\"].value_counts()" ] @@ -475,440 +177,37 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "agent_name\n", - "code_gpt4o_03_february_text 165\n", - "code_o1_03_february_ablation-toolcalling-manager 165\n", - "code_o1_01_february_text 165\n", - "code_o3-mini_03_february_remove-navigational 165\n", - "code_o1_04_february_submission5 165\n", - "code_o1_03_february_text_high-reasoning-effort 165\n", - "code_o1_03_february_remove-navigational 164\n", - "code_o1_03_february_fix-print-outputs 164\n", - "code_o1_04_february_submission 162\n", - "code_o1_03_february_goodoldtext-unbroken 161\n", - "code_gpt4o_03_february_goodoldtext-unbroken 159\n", - "code_gpt4o_03_february_magenticbrowser 159\n", - "code_o1_03_february_fix-print-outputs2 156\n", - "code_gpt4o_03_february_magenticbrowser2 156\n", - "code_o1_04_february_submission-medium 125\n", - "code_o1_29-01_text 105\n", - "code_llama-3 90\n", - "code_o1_22-01_managedagent-summary_planning 67\n", - "code_o1_25-01_visioon 53\n", - "code_o1_04_february_submission3 49\n", - "code_qwen-coder-32B_03_february_text 43\n", - "code_o1_04_february_submission4 6\n", - "Name: count, dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "agent_name task\n", - "code_gpt4o_03_february_goodoldtext-unbroken 2 84\n", - " 1 53\n", - " 3 22\n", - "code_gpt4o_03_february_magenticbrowser 2 83\n", - " 1 52\n", - " ..\n", - "code_o3-mini_03_february_remove-navigational 1 53\n", - " 3 26\n", - "code_qwen-coder-32B_03_february_text 2 22\n", - " 1 14\n", - " 3 7\n", - "Name: count, Length: 65, dtype: int64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total length: 2809 - is complete: False\n" - ] - } - ], + "outputs": [], "source": [ - "o1_vision = \"code_o1_25-01_visioon\"\n", - "o1_next = \"code_o1_29-01_text\"\n", - "o1 = \"code_o1_01_february_text\"\n", - "\n", - "list_versions = [o1, o1_vision, o1_next]\n", - "\n", - "# submission_selection_name = \"react_code_llama3-70b_02-05_full-gaia-validation-code\"\n", "sel_df = result_df\n", "# sel_df = sel_df.loc[\n", "# (result_df[\"agent_name\"].isin(list_versions))\n", - "# # & (~result_df[\"question\"].isin(UNSOLVED_QUESTIONS))\n", "# ]\n", "sel_df = sel_df.reset_index(drop=True)\n", "display(sel_df[\"agent_name\"].value_counts())\n", "sel_df = sel_df.drop_duplicates(subset=[\"agent_name\", \"question\"])\n", "display(sel_df.groupby(\"agent_name\")[[\"task\"]].value_counts())\n", - "print(\"Total length:\", len(sel_df), \"- is complete:\", len(sel_df) == 165)\n", - "# assert sel_df[\"question\"].value_counts().max() == len(list_versions), \"Some questions are duplicate!\"" + "print(\"Total length:\", len(sel_df), \"- is complete:\", len(sel_df) == 165)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Average score:'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is_correct
agent_name
code_gpt4o_03_february_goodoldtext-unbroken0.384
code_gpt4o_03_february_magenticbrowser0.352
code_gpt4o_03_february_magenticbrowser20.365
code_gpt4o_03_february_text0.376
code_llama-30.078
code_o1_01_february_text0.491
code_o1_03_february_ablation-toolcalling-manager0.327
code_o1_03_february_fix-print-outputs0.518
code_o1_03_february_fix-print-outputs20.558
code_o1_03_february_goodoldtext-unbroken0.534
code_o1_03_february_remove-navigational0.537
code_o1_03_february_text_high-reasoning-effort0.485
code_o1_04_february_submission0.494
code_o1_04_february_submission-medium0.488
code_o1_04_february_submission30.490
code_o1_04_february_submission40.500
code_o1_04_february_submission50.552
code_o1_22-01_managedagent-summary_planning0.418
code_o1_25-01_visioon0.340
code_o1_29-01_text0.390
code_o3-mini_03_february_remove-navigational0.291
code_qwen-coder-32B_03_february_text0.209
\n", - "
" - ], - "text/plain": [ - " is_correct\n", - "agent_name \n", - "code_gpt4o_03_february_goodoldtext-unbroken 0.384\n", - "code_gpt4o_03_february_magenticbrowser 0.352\n", - "code_gpt4o_03_february_magenticbrowser2 0.365\n", - "code_gpt4o_03_february_text 0.376\n", - "code_llama-3 0.078\n", - "code_o1_01_february_text 0.491\n", - "code_o1_03_february_ablation-toolcalling-manager 0.327\n", - "code_o1_03_february_fix-print-outputs 0.518\n", - "code_o1_03_february_fix-print-outputs2 0.558\n", - "code_o1_03_february_goodoldtext-unbroken 0.534\n", - "code_o1_03_february_remove-navigational 0.537\n", - "code_o1_03_february_text_high-reasoning-effort 0.485\n", - "code_o1_04_february_submission 0.494\n", - "code_o1_04_february_submission-medium 0.488\n", - "code_o1_04_february_submission3 0.490\n", - "code_o1_04_february_submission4 0.500\n", - "code_o1_04_february_submission5 0.552\n", - "code_o1_22-01_managedagent-summary_planning 0.418\n", - "code_o1_25-01_visioon 0.340\n", - "code_o1_29-01_text 0.390\n", - "code_o3-mini_03_february_remove-navigational 0.291\n", - "code_qwen-coder-32B_03_february_text 0.209" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is_correctis_near_correctcount_stepscount
agent_nametask
code_gpt4o_03_february_goodoldtext-unbroken10.4528300.4528307.00000053
20.3809520.3928578.51190584
30.2272730.22727310.40909122
code_gpt4o_03_february_magenticbrowser10.4807690.4807697.15384652
20.3493980.3614468.16867583
..................
code_o3-mini_03_february_remove-navigational20.2325580.2441864.97674486
30.1538460.1538466.61538526
code_qwen-coder-32B_03_february_text10.3571430.3571435.42857114
20.1363640.1363646.40909122
30.1428570.1428576.5714297
\n", - "

65 rows ร— 4 columns

\n", - "
" - ], - "text/plain": [ - " is_correct \\\n", - "agent_name task \n", - "code_gpt4o_03_february_goodoldtext-unbroken 1 0.452830 \n", - " 2 0.380952 \n", - " 3 0.227273 \n", - "code_gpt4o_03_february_magenticbrowser 1 0.480769 \n", - " 2 0.349398 \n", - "... ... \n", - "code_o3-mini_03_february_remove-navigational 2 0.232558 \n", - " 3 0.153846 \n", - "code_qwen-coder-32B_03_february_text 1 0.357143 \n", - " 2 0.136364 \n", - " 3 0.142857 \n", - "\n", - " is_near_correct \\\n", - "agent_name task \n", - "code_gpt4o_03_february_goodoldtext-unbroken 1 0.452830 \n", - " 2 0.392857 \n", - " 3 0.227273 \n", - "code_gpt4o_03_february_magenticbrowser 1 0.480769 \n", - " 2 0.361446 \n", - "... ... \n", - "code_o3-mini_03_february_remove-navigational 2 0.244186 \n", - " 3 0.153846 \n", - "code_qwen-coder-32B_03_february_text 1 0.357143 \n", - " 2 0.136364 \n", - " 3 0.142857 \n", - "\n", - " count_steps count \n", - "agent_name task \n", - "code_gpt4o_03_february_goodoldtext-unbroken 1 7.000000 53 \n", - " 2 8.511905 84 \n", - " 3 10.409091 22 \n", - "code_gpt4o_03_february_magenticbrowser 1 7.153846 52 \n", - " 2 8.168675 83 \n", - "... ... ... \n", - "code_o3-mini_03_february_remove-navigational 2 4.976744 86 \n", - " 3 6.615385 26 \n", - "code_qwen-coder-32B_03_february_text 1 5.428571 14 \n", - " 2 6.409091 22 \n", - " 3 6.571429 7 \n", - "\n", - "[65 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(\"Average score:\", sel_df.groupby(\"agent_name\")[[\"is_correct\"]].mean().round(3))\n", "display(\n", - " sel_df.groupby([\"agent_name\", \"task\"])[[\"is_correct\", \"is_near_correct\", \"count_steps\", \"question\"]]\n", + " sel_df.groupby([\"agent_name\", \"task\"])[[\"is_correct\", \"is_near_correct\", \"count_steps\", \"question\", \"duration\"]]\n", " .agg(\n", " {\n", " \"is_correct\": \"mean\",\n", " \"is_near_correct\": \"mean\",\n", " \"count_steps\": \"mean\",\n", " \"question\": \"count\",\n", + " \"duration\": \"mean\",\n", " }\n", " )\n", " .rename(columns={\"question\": \"count\"})\n", @@ -917,9851 +216,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "customdata": [ - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "According to github, when was Regression added to " - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "When you take the average of the standard populati" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "In terms of geographical distance between capital " - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "What is the last word before the second chorus of " - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "What is the latest chronological year date written" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "What was the complete title of the book in which t" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "As of August 2023, who is the only winner of the U" - ] - ], - "hovertemplate": "agent_name=code_gpt4o_03_february_goodoldtext-unbroken
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_gpt4o_03_february_goodoldtext-unbroken", - "line": { - "color": "#636efa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_gpt4o_03_february_goodoldtext-unbroken", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4A", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADoP1VVVVVVVeU/MzMzMzMz4z9ddNFFF13kP1VVVVVVVeU/dmIndmIn5j8lSZIkSZLkPzMzMzMzM+M/AAAAAAAA4j/T0tLS0tLiP3Icx3Ecx+E/bCivobyG4j+amZmZmZnhP5IkSZIkSeI/dNFFF1104T8LWchCFrLgP1VVVVVVVeE/7FG4HoXr4T+xEzuxEzvhP3sJ7SW0l+A/AAAAAAAA4D/d0wjLPY3gPxEREREREeE/hBBCCCGE4D8AAAAAAADgPwgffPDBB98/AAAAAAAA4D9f8RVf8RXfP47jOI7jON4/fdYNpshn3T8bymsor6HcP1y+5Vu+5ds/zczMzMzM3D8ZnI/B+RjcPz3P8zzP89w/EnfEHXFH3D+jiy666KLbPxzHcRzHcdw/05ve9KY33T94Nuo7G/XdP1VVVVVVVd0/L6fg5RS83D9xPQrXo3DdP93c3Nzc3Nw/7MRO7MRO3D8iNcF4K/vcPxPaS2gvod0/F1100UUX3T8lSZIkSZLcPx/BfQT3Edw/GmG5pxGW2z91Xx5bETTcP7y7u7u7u9s/Q7CONu9T3D/fe++9997bP9u2bdu2bds/AAAAAAAA2z9bqZVaqZXaPyebbLLJJts/NSbSA5Wz2z88PDw8PDzcP8y1A3PtwNw/fMVXfMVX3D8LmwOJVtjcPxzHcRzHcdw/4MCBAwcO3D/QusEU+azbP08b6LSBTts/KK+hvIby2j++Y2pg75jaPxqkQRqkQdo/2TMQlY7s2T+amZmZmZnZP+Dp1vywSNk/+hicj8H52D+q82sPuazYP0mSJEmSJNk/2djY2NjY2D9T1pQ1ZU3ZPzv0m61Dv9k/L7rooosu2j+e8YxnPOPZP5qZmZmZmdk/WqAFWqAF2j+c3vSmN73ZP3bZZZdddtk/Z6O+s1Hf2T+amZmZmZnZPwAAAAAAANo/Grab5Ulk2j+IxvrQWB/aPywFav1Kgdo/PQrXo3A92j8ZvhEFJp3aP/v6+vr6+to/G0PTHey32j87sRM7sRPbP9u2bdu2bds/ln0OqQnG2z8T6J26loPbPya0l9BeQts/JrBpP1kC2z/D2jesfcPaP5ax/Y5eGds/27Zt27Zt2z80+bJBky/bPyivobyG8to/q8FzBIq22j8+jbDc0wjbP5u1WZu1Wds/BA0ndV8e2z+bCOSaCOTaPzMzMzMzM9s/hYn3I6f52j+f4pIhWEfbPw8b6bCRDts/W2uttdZa2z/ZzvdT46XbP/y+7/u+79s/7na73W632z8AAAAAAADcP/KGvCFvyNs/HLmRG7mR2z8j+oDq2FvbPyebbLLJJts/27Zt27Zt2z9YYyI9UDnbP1uwBVuwBds/09LS0tLS2j/TVwljs6DaP6c3velNb9o/D+jGPH202j87qIM6qIPaP2le/ImEU9o/gkQrbA4k2j9r/N08QvXZP3Icx3Ecx9k/mpmZmZmZ2T/Lli1btmzZP2x21CLkr9k/I591gyny2T9SkPx5lcXZP5qZmZmZmdk/y7hl3DJu2T82lNdQXkPZP9ouhNkuhNk/EWflJ8RZ2T8wWf6S5S/ZP2mQBmmQBtk/fo/ICcLd2D9rcRPmd7XYP3bpMX+vjdg/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "When you take the average of the standard populati" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "According to github, when was Regression added to " - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "What is the latest chronological year date written" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "Of the cities within the United States where U.S. " - ] - ], - "hovertemplate": "agent_name=code_gpt4o_03_february_magenticbrowser
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_gpt4o_03_february_magenticbrowser", - "line": { - "color": "#EF553B", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_gpt4o_03_february_magenticbrowser", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4A", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACamZmZmZnJP1VVVVVVVcU/kiRJkiRJwj8AAAAAAADAPxzHcRzHcbw/mpmZmZmZyT900UUXXXTRPwAAAAAAANA/FDuxEzux0z+3bdu2bdvWP1VVVVVVVdU/AAAAAAAA1D/T0tLS0tLSP3Icx3Ecx9E/eQ3lNZTX0D8AAAAAAADQP5IkSZIkSdI/dNFFF1100T84velNb3rTP1VVVVVVVdU/exSuR+F61D8UO7ETO7HTP2gvob2E9tI/kiRJkiRJ0j8Jyz2NsNzTPzMzMzMzM9M/lVJKKaWU0j8AAAAAAADUP1VVVVVVVdU/tbS0tLS01D/UQR3UQR3UP+Q4juM4jtM/HEyRz7rB1D9RXkN5DeXVP1VVVVVVVdU/ZmZmZmZm1j/blahdidrVP7dt27Zt29Y/lTVlTVlT1j9GF1100UXXPxdswRZswdY/etOb3vSm1z9dQUyuICbXPwAAAAAAANg/4eUUvJyC1z8K16NwPQrXP9jX19fX19c/J3ZiJ3Zi1z9ln0NqgvHWP0xoL6G9hNY/RhdddNFF1z+3bdu2bdvWP0xnMZ3FdNY/fBphuacR1j/QcFL35bHVP1VVVVVVVdU/yRCso8371D+ttdZaa63VP1VVVVVVVdU/AAAAAAAA1T/VSq3USq3UP1VVVVVVVdU/0gOVs1v41T+mpaWlpaXVP1VVVVVVVdU/Fl/xFV/x1T8g0QqbA4nWP47jOI7jONY/r169evXq1T/JZ91ginzWPwrXo3A9Ctc/ymsor6G81j8oxFn5CXHWP3ZiJ3ZiJ9Y/Xi1uwvyu1j9mZmZmZmbWP6QMPN2aH9Y/25WoXYna1T80dX7tIZfVP1VVVVVVVdU/FRUVFRUV1T82ZU1ZU9bUPy+QSfECmdQ/XXTRRRdd1D9CEYpQhCLUP5Q+6ZM+6dM/VEZlVEZl1D9DFrKQhSzUP6WUUkoppdQ/Ut/ZqO9s1D8mTv2eW+LUP1VVVVVVVdU/1g86KvDF1T9jfWisD43VP1DrVwrU+tU/w/UoXI/C1T+bB7nrZ4vVP/b19fX19dU/2xia7mC/1T+e2Imd2InVP1VVVVVVVdU/2eeQmmC81T/XcnCzX4jVP9FeQnsJ7dU//mQJbNpP1j8c1r5h7RvWP49eGdvv6NU/btu2bdu21T96amGlpxbWP0xnMZ3FdNY/bTV4jkDR1j9Y7mmE5Z7WP9ZmbdZmbdY/QcNJ3ZfH1j/XRCDXRCDXP3d3d3d3d9c/RhdddNFF1z8RrKPN+xTXP+UWT27x5NY/Ouecc8451z8K16NwPQrXP9d1Xdd1Xdc/7PV6vV6v1z8AAAAAAIDXP/QFfUFf0Nc/GHqhF3qh1z/f2jDNXfDXP8IHH3zwwdc/9oDZA2YP2D9JD1TObuHXP0J7Ce0ltNc/iIeHh4eH1z82C6o9J9PXP4K5dmCuHdg/6qPVJETx1z+ogzqogzrYP2C3x1qGDtg/Zfx2qSfj1z/MknJAZLjXP+Q4juM4jtc/J0p2baJk1z+6c+fOnTvXP+HlFLycgtc/n3WDKfJZ1z99GzBU0zHXP3d3d3d3d9c/uj5dn65P1z+H8hrKayjXP1esAVesAdc/t23btm3b1j+21lprrbXWPwdpkAZpkNY/dRhlKp5r1j9eLW7C/K7WP+EMCCV3itY/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "When you take the average of the standard populati" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "According to github, when was Regression added to " - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "What is the latest chronological year date written" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "What was the actual enrollment count of the clinic" - ] - ], - "hovertemplate": "agent_name=code_gpt4o_03_february_magenticbrowser2
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_gpt4o_03_february_magenticbrowser2", - "line": { - "color": "#00cc96", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_gpt4o_03_february_magenticbrowser2", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADgPxzHcRzHcdw/AAAAAAAA4D8XXXTRRRfdPwAAAAAAAOA/sRM7sRM74T+SJEmSJEniPxEREREREeE/AAAAAAAA4D8eHh4eHh7ePwAAAAAAAOA/DeU1lNdQ3j/NzMzMzMzcP57neZ7ned4/F1100UUX3T+96U1vetPbP1VVVVVVVd0/KVyPwvUo3D87sRM7sRPbPy+hvYT2Eto/27Zt27Zt2z9huacRlnvaP5qZmZmZmdk/11prrbXW2j8AAAAAAADcPxdddNFFF90/PDw8PDw83D/btm3btm3bP6uqqqqqqto/0LrBFPms2z8or6G8hvLaPxqkQRqkQdo/mpmZmZmZ2T+J2pWoXYnaP9u2bdu2bds/EnfEHXFH3D8XXXTRRRfdPxzHcRzHcdw/velNb3rT2z9yBTG5gpjcP1VVVVVVVd0/g5dT8HIK3j9xPQrXo3DdP93c3Nzc3Nw/7MRO7MRO3D8iNcF4K/vcPxzHcRzHcdw/7RvWvmHt2z8lSZIkSZLcPx/BfQT3Edw/1AjLPY2w3D91Xx5bETTcP83MzMzMzNw/532KS4Zg3T/nnHPOOefcP13XdV3Xdd0/AAAAAAAA3T/dyI3cyI3cPxdddNFFF90/rDGRHqic3T8tLS0tLS3dP8y1A3PtwNw/fMVXfMVX3D8yfrvUk/HbPxzHcRzHcdw/4MCBAwcO3D/QusEU+azbP08b6LSBTts/KK+hvIby2j/btm3btm3bP1y+5Vu+5ds/FzdhfleL2z8zMzMzMzPbP35YpAw83do/idqVqF2J2j/ksmKghDfaP3qe53me59k/mpmZmZmZ2T9T1pQ1ZU3ZPxUvkEnxAtk/dNFFF1102T+TlaxkJSvZP5qZmZmZmdk/GZVRGZVR2T+RhSxkIQvZP3bZZZdddtk/5QpicgUx2T+amZmZmZnZP1VVVVVVVdk/mYbtZnkS2T801ofG+tDYPzbZZJNNNtk/9ihcj8L12D+qeZC7frbYPxkZGRkZGdk/i/gEUsl52T+xEzuxEzvZP5qZmZmZmdk/fg6pCcZb2T/7hVhRGh/ZPzmO4ziO49g/koq51Rmp2D9wWPuGtW/YP6+M7Xf0ytg/JUmSJEmS2D/pqYWVnlrYPzqL6Syms9g/iHG/Lql82D/LPY2w3NPYP9mJndiJndg/6r48tiJo2D9YoTNYoTPYPwAAAAAAANg/Kky8HznN1z/jkiFYR5vXP2pXonYlatc/vvfee++91z+q8dJNYhDYP/h93/d939c/DAaDwWAw2D8AAAAAAADYPxT2hD1hT9g/+IEf+IEf2D/pA6pjb23YPz744IMPPtg/qYilIpaK2D8m0gOVs1vYP9iCLdiCLdg/AAAAAAAA2D9Q7TmZvkrYP4mfUeJnlNg/TGV71wHd2D/5iq/4iq/YP2JyBTG5gtg/0QqbA4lW2D/ZiZ3YiZ3YPzmO4ziO49g/0nmLIZ232D/EiBEjRozYPzTWh8b60Ng/YYp81g2m2D/oVRZntHvYP1K4HoXrUdg/waJgUbAo2D8AAAAAAADYP9jX19fX19c/1cDeMTWw1z+JV5F4FYnXPyd2Yid2Ytc/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "When you take the average of the standard populati" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "According to github, when was Regression added to " - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "What is the final numeric output from the attached" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "What is the latest chronological year date written" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "What was the complete title of the book in which t" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "According to the USGS, in what year was the Americ" - ] - ], - "hovertemplate": "agent_name=code_gpt4o_03_february_text
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_gpt4o_03_february_text", - "line": { - "color": "#ab63fa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_gpt4o_03_february_text", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVdU/AAAAAAAA4D8zMzMzMzPjP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADkP3Icx3Ecx+E/AAAAAAAA4D8XXXTRRRfdPwAAAAAAAOA/sRM7sRM74T8AAAAAAADgP97d3d3d3d0/AAAAAAAA3D9aWlpaWlraPzmO4ziO49g/KK+hvIby2j+amZmZmZnZP9u2bdu2bds/L7rooosu2j+96U1vetPbP6uqqqqqqto/mpmZmZmZ2T/ZiZ3YiZ3YPy+hvYT2Eto/27Zt27Zt2z/UCMs9jbDcP7y7u7u7u9s/55xzzjnn3D8AAAAAAADcPxdddNFFF90/PDw8PDw83D8d1EEd1EHdP47jOI7jON4/KvJZN5gi3z8N5TWU11DeP9/yLd/yLd8/AAAAAAAA4D84H4PzMTjfPwAAAAAAAOA/0Bf0BX1B3z8AAAAAAADgP5/0SZ/0Sd8/6k1vetOb3j94Nuo7G/XdP1VVVVVVVd0/L6fg5RS83D8pXI/C9SjcP93c3Nzc3Nw/7MRO7MRO3D+WfQ6pCcbbPya0l9BeQts/7RvWvmHt2z8lSZIkSZLcPx/BfQT3Edw/1AjLPY2w3D91Xx5bETTcP7y7u7u7u9s/Q7CONu9T3D/fe++9997bP9u2bdu2bds/AAAAAAAA2z8cuZEbuZHbPx988MEHH9w/NSbSA5Wz2z9LS0tLS0vbP73pTW9609s/27Zt27Zt2z8yfrvUk/HbP+Q4juM4jts/2bJly5Yt2z/QusEU+azbP08b6LSBTts/KK+hvIby2j++Y2pg75jaPxqkQRqkQdo/2TMQlY7s2T9mZmZmZmbaPy+hvYT2Eto/idqVqF2J2j+CEt5o6vzaP6uqqqqqqto/WlpaWlpa2j+zpqwpa8raP2G5pxGWe9o/L7rooosu2j+e8YxnPOPZP/qkT/qkT9o/WqAFWqAF2j+c3vSmN73ZPyeaaKKJJto/Z6O+s1Hf2T+amZmZmZnZPwAAAAAAANo/Wp5EpmG72T+IxvrQWB/aPzFvZ0jM29k/mpmZmZmZ2T96kLt+tljZP7q5ubm5udk/i/gEUsl52T+KndiJndjZP5qZmZmZmdk/fg6pCcZb2T+B3qlrObjZP7SX0F5Ce9k/XJ2RirnV2T+amZmZmZnZP+mVsf2OXtk/btu2bdu22T+hyZcNmnzZPzGdxXQW09k/mpmZmZmZ2T+oEZZ7GmHZP1qbtVmbtdk/lLovj60I2j8arNAZrNDZPyIiIiIiIto/vB85zdfq2T/8FJcMwTraP4nalahdido/U0oppZRS2j/pJjEIrBzaP3qe53me59k/bTabzWaz2T8AAAAAAIDZP3PGnDFnzNk/mpmZmZmZ2T8GfxUnpOTZP7LJJptsstk/wp8Jfyb82T+/GhPpgcrZP5qZmZmZmdk/aWlpaWlp2T+fk+mrhLHZP6BR4meU+Nk/rSYhir/I2T+amZmZmZnZP2bogN0ea9k/FjYHEq2w2T/lgMhwr4LZP3Icx3Ecx9k/famg1ZcK2j/SpEmTJk3aP4jG+tBYH9o/DqbIZ91g2j8h+fMqizPaPwc6baDTBto/z2pntbPa2T/zGsprKK/ZP9ouhNkuhNk/EWflJ8RZ2T8wWf6S5S/ZP2mQBmmQBtk/fo/ICcLd2D9rcRPmd7XYP3bpMX+vjdg/ZmZmZmZm2D+vUkzQXaXYP5Ey8HRrftg/GFuCb/NX2D8yOB+D8zHYPwyYxoBpDNg/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "According to github, when was Regression added to " - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "When you take the average of the standard populati" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "What is the area of the green polygon in the attac" - ] - ], - "hovertemplate": "agent_name=code_llama-3
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_llama-3", - "line": { - "color": "#FFA15A", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_llama-3", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZ", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEREREREbE/AAAAAAAAsD8eHh4eHh6uPxzHcRzHcaw/KK+hvIbyqj+amZmZmZmpPxiGYRiGYag/RhdddNFFpz9kIQtZyEKmP1VVVVVVVaU/exSuR+F6pD8UO7ETO7GjP2gvob2E9qI/kiRJkiRJoj+WexphuaehPxEREREREaE/hBBCCCGEoD8AAAAAAACgPwgffPDBB58/Hh4eHh4erj8d1EEd1EGtPxzHcRzHcaw/0LrBFPmsqz8or6G8hvKqPxqkQRqkQao/MzMzMzMzsz+7ErUrUbuyP5IkSZIkSbI/d8QdcUfcsT900UUXXXSxPxEREREREbE/ZCELWchCtj9XEJMriMm1P1VVVVVVVbU/OQUvp+DltD97FK5H4Xq0PxQUFBQUFLQ/FDuxEzuxsz/BeCv7HFKzP2gvob2E9rI/nhLkKUGesj+SJEmSJEmyP3AfwX0E97E/fBphuacRtj/QcFL35bG1P1VVVVVVVbU/yRCso837tD/GGGOMMca4PxiGYRiGYbg/AAAAAAAAuD8YeqEXeqG3P0YXXXTRRbc/jYn0QOXstj+XlpaWlpa2P2QhC1nIQrY/Fl/xFV/xtT9ItMLmQKK1P1VVVVVVVbU/qFChQoUKtT8cTJHPusG0P3sUrkfherQ/XkN5DeU1tD/Oyk+Is/KzP5dv+ZZv+bY/Xi1uwvyutj9mZmZmZma2P6QMPN2aH7Y/25WoXYnatT80dX7tIZe1P1VVVVVVVbU/FRUVFRUVtT82ZU1ZU9a0Py+QSfECmbQ/XXTRRRddtD9CEYpQhCK0P5Q+6ZM+6bM/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "When you take the average of the standard populati" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "In terms of geographical distance between capital " - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "According to github, when was Regression added to " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "What is the latest chronological year date written" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "The brand that makes these harnesses the dogs are " - ] - ], - "hovertemplate": "agent_name=code_o1_01_february_text
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_01_february_text", - "line": { - "color": "#19d3f3", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_01_february_text", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA6D+amZmZmZnpP1VVVVVVVeU/t23btm3b5j8AAAAAAADoP1VVVVVVVeU/ZmZmZmZm5j9ddNFFF13kP6uqqqqqquI/FDuxEzux4z+SJEmSJEniPxEREREREeE/AAAAAAAA4j/x8PDw8PDgPwAAAAAAAOA/DeU1lNdQ3j/NzMzMzMzcP57neZ7ned4/F1100UUX3T+96U1vetPbP6uqqqqqqto/KVyPwvUo3D+e2Imd2IndPxzHcRzHcdw/btu2bdu23T9HWO5phOXePwAAAAAAAOA/hBBCCCGE4D8AAAAAAADgP3zwwQcffOA/AAAAAAAA4D9QB3VQB3XgPzmO4ziO4+A/whT5rBtM4T95DeU1lNfgP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhPzEMwzAMw+A/R9wRd8Qd4T900UUXXXThPxEREREREeE/C1nIQhay4D/E5ApicgXhP6uqqqqqquA/FbycgpdT4D+kcD0K16PgP/Hw8PDw8OA/sRM7sRM74T9vZZ9DaoLhP3Icx3Ecx+E/CfKUIE8J4j9u27Zt27bhP3AfwX0E9+E/lnsaYbmn4T8NJ3VfHlvhPxEREREREeE/DcE62rxP4T+MMcYYY4zhP1EURVEUReE/AAAAAAAA4T/RC73QC73gP/jggw8++OA/TKQHKme34D/x8PDw8PDgPxM/o8TPKOE/8RVf8RVf4T8OJFphcyDhPzmO4ziO4+A/iREjRowY4T/CFPmsG0zhPxEREREREeE/NpTXUF5D4T/lJ8RZ+QnhP7ETO7ETO+E/BqLSkT0D4T8zMzMzMzPhPyNl4OnW/OA/LFG7ErUr4T9T59ceclnhP0mSJEmSJOE/8fDw8PDw4D8w6Av6gr7gP93TCMs9jeA/XXTRRRdd4D8DF7jABS7gPwAAAAAAAOA/0AIt0AIt4D+GLGQhC1ngP4QQQgghhOA/QUyuICZX4D+yAmGkHSvgP1VVVVVVVeA/8MXVDzoq4D8VvJyCl1PgP3+lQK1fKeA/UrgehetR4D8cUWDSqXngP6GgoKCgoOA/9lttDE134D/sxE7sxE7gP3ACJ3ACJ+A/463sc0hN4D8hVpTGRybgPwAAAAAAAOA/WQKb9pMl4D8AAAAAAADgP04CcaHmJOA/kiRJkiRJ4D/3QwJvPyTgP34E9xHcR+A/AkVbDZ4j4D/uaYTlnkbgPzACIzACI+A/AAAAAAAA4D/gKLvfKLvfP3d3d3d3d98/jmVQKky83z8uGYJ1tHnfPzgfg/MxON8/+N5777333j8IrBxaZDvfP7/v+77v+94/0Ofz+Xw+3z8AAAAAAIDfP/AH/AF/wN8/IPiBH/iB3z97a8M0d8HfP4QPPvjgg98/c/TN0TdH3z9FeqBydgvfP5/0SZ/0Sd8/Dw8PDw8P3z/ZLKj2nEzfP/EzSvyMEt8/Rs6w4FLZ3j9f8RVf8RXfP31no76zUd8/lPHbpZ6M3z89QvWZtsbfPwAAAAAAAOA/Dnj84YDH3z8AAAAAAADgP/LX7KhFyN8/AAAAAAAA4D+ZS4QnBcnfPwAAAAAAAOA//iZ/k7/J3z8AAAAAAADgPyB1yh91yt8/cVZ+QpyV3z9hHxf2cWHfP9/yLd/yLd8/PiInCHdj3z9hfleLmzDfPzqkJhhvZd8/mpmZmZmZ3z+P5g82Hs3fP1ikDDzdmt8/sxpFHDpp3z+cj8H5GJzfP2vfsPYNa98/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "According to github, when was Regression added to " - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "When you take the average of the standard populati" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "What is the latest chronological year date written" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "What is the final numeric output from the attached" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "In the film Goldfinger, what color was the object " - ] - ], - "hovertemplate": "agent_name=code_o1_03_february_ablation-toolcalling-manager
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_03_february_ablation-toolcalling-manager", - "line": { - "color": "#FF6692", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_03_february_ablation-toolcalling-manager", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAAAAAFVVVVVVVdU/AAAAAAAA4D8zMzMzMzPjP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADkP3Icx3Ecx+E/AAAAAAAA4D900UUXXXThPwAAAAAAAOA/ntiJndiJ3T/btm3btm3bP97d3d3d3d0/AAAAAAAA3D9aWlpaWlraPxzHcRzHcdw/KK+hvIby2j/NzMzMzMzcP57neZ7ned4/F1100UUX3T+96U1vetPbP6uqqqqqqto/mpmZmZmZ2T87sRM7sRPbPxzHcRzHcdw/27Zt27Zt2z9huacRlnvaP7y7u7u7u9s/11prrbXW2j8AAAAAAADcPyebbLLJJts/WlpaWlpa2j/btm3btm3bP6uqqqqqqto/I591gyny2T8or6G8hvLaP1y+5Vu+5ds/MzMzMzMz2z+J2pWoXYnaP3qe53me59k/s6asKWvK2j8vuuiiiy7aP1uwBVuwBds/velNb3rT2z9t1Hc26jvbP6uqqqqqqto/iMb60Fgf2j+amZmZmZnZPxkZGRkZGdk/2Ymd2Imd2D9+DqkJxlvZPy+hvYT2Eto/mpmZmZmZ2T9JkiRJkiTZPzqL6Syms9g/7mmE5Z5G2D+yFUHDSd3XP3d3d3d3d9c/EayjzfsU1z+21lprrbXWP5ZlWZZlWdY/AAAAAAAA1z9XaqVWaqXWP0422WSTTdY/jYn0QOXs1j+Ih4eHh4fXP3PtwFw7MNc/t23btm3b1j8g0QqbA4nWP47jOI7jONY/r169evXq1T/yWTeYIp/VPzCW/GLJL9Y/UV5DeQ3l1T8KcVZ+QpzVP3ZiJ3ZiJ9Y/v6vFTZjf1T9mZmZmZmbWP/PDImXg6dY/onYlalei1j/S1Pm1h1zWP4ZhGIZhGNY/1tXV1dXV1T9lTVlT1pTVP1VVVVVVVdU/F1100UUX1T9ObWpTm9rUP/VJn/RJn9Q/lVEZlVEZ1T9Ob3rTm97UP1VVVVVVVdU/1Hc26jsb1T8mTv2eW+LUP1VVVVVVVdU/Ffji6gcd1T85BS+n4OXUP1VVVVVVVdU/H4XrUbge1T+bB7nrZ4vVP/b19fX19dU/Iz6BVHJe1j92Yid2YifWP9dojdZojdY/n0NqgvFW1j9dy8HNfiHWP0xoL6G9hNY/Y251Rirm1j+x9g1r37DWPwJxoeYkENc/t23btm3b1j9WemphpafWP0xnMZ3FdNY/ZCELWchC1j9Y7mmE5Z7WP9ZmbdZmbdY/CRpO6r481j9W6AxW6AzWP2ZmZmZmZtY/fa2eHQI31j9t3qe4ZAjWP2DW+2W9X9Y/MsYYY4wx1j+6SQwCK4fWP7dt27Zt29Y/q9VqtVqt1j8AAAAAAIDWP7UlbUlb0tY/N3IjN3Ij1z/LiD6gOvbWP8omm2yyydY/3Wl1p9Wd1j+NifRA5ezWP61z5QHJOtc/iIeHh4eH1z8cKRrij1vXP3PtwFw7MNc/iOIvcoYF1z+3bdu2bdvWP1uGDtjtsdY/INEKmwOJ1j/Am0eoPtPWP6uqqqqqqtY/QzpvMaTz1j+2bNmyZcvWP6lFyF+zo9Y/yWfdYIp81j+vsjij3cPWP5020GkDndY/tNpZ7ax21j8N5TWU11DWP9aAK9aAK9Y/mRrYO6YG1j/iVSReReLVP3ZiJ3ZiJ9Y/SS9/2kID1j+/q8VNmN/VP9nnkJpgvNU/mpmZmZmZ1T+hu0oxQXfVP1VVVVVVVdU/0j5IBtQz1T8TtStRuxLVP/KUIE8J8tQ/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "Use density measures from the chemistry materials " - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "In terms of geographical distance between capital " - ], - [ - "When you take the average of the standard populati" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "According to github, when was Regression added to " - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "What is the latest chronological year date written" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "The attached spreadsheet contains a list of books " - ] - ], - "hovertemplate": "agent_name=code_o1_03_february_fix-print-outputs
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_03_february_fix-print-outputs", - "line": { - "color": "#B6E880", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_03_february_fix-print-outputs", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAA==", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADkP1VVVVVVVeU/ZmZmZmZm5j9GF1100UXnP1VVVVVVVeU/dmIndmIn5j+3bdu2bdvmP1VVVVVVVeU/AAAAAAAA5j+1tLS0tLTkP1VVVVVVVeU/UV5DeQ3l5T9mZmZmZmbmP1VVVVVVVeU/XXTRRRdd5D9Ob3rTm97kPwAAAAAAAOQ/MzMzMzMz4z9iJ3ZiJ3biP3Icx3Ecx+E/SZIkSZIk4T+WexphuafhPyIiIiIiIuI/jDHGGGOM4T8AAAAAAADhP3TRRRdddOE/4uHh4eHh4T+SJEmSJEniP3Icx3Ecx+E/mCKfdYMp4j/zGsprKK/hP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhP2IYhmEYhuE/d8QdcUfc4T8vuuiiiy7iP9InfdInfeI/IQtZyEIW4j9HfWejvrPhPwAAAAAAAOI/aKwPjfWh4T9I4XoUrkfhP5KRkZGRkeE/sRM7sRM74T9vZZ9DaoLhP3Icx3Ecx+E/dNFFF1104T9JkiRJkiThP3UW01lMZ+E/uacRlnsa4T/VfXlsRdDgPxEREREREeE/DcE62rxP4T8IIYQQQgjhPzEMwzAMw+A/AAAAAAAA4T/RC73QC73gP3zwwQcffOA/TKQHKme34D94eHh4eHjgPwtZyEIWsuA/oQ7qoA7q4D8OJFphcyDhP1VVVVVVVeE/jBgxYsSI4T/CFPmsG0zhP36x5BdLfuE/8xrKayiv4T8De8fUwN7hP9IgDdIgDeI/pSN7BqLS4T+amZmZmZnhP8rA0635YeE/LFG7ErUr4T9T59ceclnhP0mSJEmSJOE/UVFRUVFR4T9f0Bf0BX3hP5Z7GmG5p+E/dNFFF1104T8UoQhFKELhPxEREREREeE/sRM7sRM74T8WspCFLGThPzTRRBNNNOE/BTG5gphc4T9BGGnHCoThP6uqqqqqquE/UoEvrn7Q4T99aKwPjfXhP29nSMzbGeI/PQrXo3A94j+LleEbUWDiPzIyMjIyMuI/Kjkvi/gE4j92Yid2YifiP7If+7Ef++E/UhOMt7LP4T+zX4gVpfHhP3Icx3Ecx+E/1hmpmFud4T+/Ye0b1r7hP18Z2+/oleE/btu2bdu24T+c6xjFuY7hP/Maymsor+E/HYGirQbP4T+E5Z5GWO7hP9IgDdIgDeI/RdBwUvfl4T9Sdr9Rdr/hP97d3d3d3eE/WQalwsT74T/ep7hkCNbhP/QxOB+D8+E/zjnnnHPO4T9Ei2zn+6nhP2IYhmEYhuE/aTQajUaj4T8AAAAAAMDhP2fMGXPGnOE/oRd6oRd64T9gxQkpeZbhP3TRRRdddOE/LBWxVMRS4T8qZ7fwqzHhP9wUo4a/TeE/aWlpaWlp4T/Ircs74EjhPxaykIUsZOE/P1pNQhR/4T+amZmZmZnhP8afSDileeE/RStsDiRa4T+xEzuxEzvhP8dxHMdxHOE/smsTJbs24T+JESNGjBjhPz801ofG+uA/TJHPusEU4T83YKimYy7hP0jhehSuR+E/ianEVGIq4T/YUF5DeQ3hP9F7JtF7JuE/5SfEWfkJ4T/uUN0O1e3gPyEN0iAN0uA/DVjSy5+24D+fgah0ZM/gP2dAKLlTtOA/mpmZmZmZ4D+aP9h4NH/gP6hb88MiZeA/axRx6KR94D+WqF2J2pXgPw==", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "When you take the average of the standard populati" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "According to github, when was Regression added to " - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "What is the final numeric output from the attached" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "What was the complete title of the book in which t" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "What is the latest chronological year date written" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "As of the 2020 census, what was the population dif" - ] - ], - "hovertemplate": "agent_name=code_o1_03_february_fix-print-outputs2
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_03_february_fix-print-outputs2", - "line": { - "color": "#FF97FF", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_03_february_fix-print-outputs2", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA6D+amZmZmZnpP6uqqqqqquo/t23btm3b5j8AAAAAAADoPzmO4ziO4+g/mpmZmZmZ6T9GF1100UXnP1VVVVVVVeU/dmIndmIn5j8lSZIkSZLkP1VVVVVVVeU/AAAAAAAA5j+1tLS0tLTkP+Q4juM4juM/bCivobyG4j8zMzMzMzPjP/Q8z/M8z+M/6aKLLrro4j84velNb3rjP6uqqqqqquI/MzMzMzMz4z8UO7ETO7HjP2gvob2E9uI/27Zt27Zt4z8Jyz2NsNzjP0REREREROQ/nXPOOeec4z8AAAAAAADjP2WTTTbZZOM/09LS0tLS4j8zMzMzMzPjP+Q4juM4juM/bzBFPusG4z/lNZTXUF7jPxQ7sRM7seM/AAAAAAAA5D9L1K5E7UrkPyVJkiRJkuQ/NmVNWVPW5D9ddNFFF13kP/VJn/RJn+Q/QxaykIUs5D/PRn1no77jPwAAAAAAAOQ/5hS8nIKX4z8zMzMzMzPjP3Nzc3Nzc+M/O7ETO7ET4z/BeCv7HFLjP2gvob2E9uI/MzMzMzMz4z+3bdu2bdviP2wor6G8huI/c08jLPc04j9+eWxF0HDiP6uqqqqqquI/sI4271Nc4j+VUkoppZTiP7Msy7Isy+I/AAAAAAAA4z8zMzMzMzPjP+miiy666OI/w6/GRHqg4j/T0tLS0tLiPzDXDsy1A+M/MzMzMzMz4z+/XerJ+O3iP6uqqqqqquI/kyZNmjRp4j+DKfJZN5jiP8aSXyz5xeI/bCivobyG4j+SJEmSJEniP9IgDdIgDeI/dWTPQFQ64j9mZmZmZmbiP8HTrflhkeI/uxK1K1G74j+Ops6vPeTiP8MwDMMwDOM/09LS0tLS4j+/oC/oC/riP+MFMileIOM/6aKLLrro4j8xhznMYQ7jPzMzMzMzM+M/0y/90i/94j+ykIUsZCHjP+2yyy677OI/TK4gJlcQ4z/PLXHq99ziP6uqqqqqquI/8yQyDdvN4j+n4OUUvJziP2r9SoFav+I/4XoUrkfh4j/zIHf9bLHiP4OCgoKCguI/cl4W8Qmk4j9iJ3ZiJ3biP5IkSZIkSeI/NcF4K/sc4j/2C7GiND7iP+0ltJfQXuI/OyMVc6sz4j9UgjwlyFPiPzUngbhQc+I/kiRJkiRJ4j94+yGBtx/iP+4juI/gPuI/IQtZyEIW4j9zTyMs9zTiP9IgDdIgDeI/4qTuy2Mr4j+SJEmSJEniP2ZmZmZmZuI/y6BUmHg/4j9Hm/cpLhniP/QxOB+D8+E/zjnnnHPO4T/sUbgehevhP3Icx3Ecx+E/aTQajUaj4T8AAAAAAMDhP3fEHXFH3OE/gh/4gR/44T/lWUb0AdXhP/DBBx988OE/3xx9c/TN4T92C78aE+nhPz0gWefKA+I/Hh4eHh4e4j/8cevyDjjiPyV+RomfUeI/oBvz9NFq4j+SJEmSJEniP8oVxOQKYuI/U0/Gb5d64j9EhnsVzJLiPxzHcRzHceI/bBMluzZR4j8SI0aMGDHiP5IkSZIkSeI/mCKfdYMp4j/TMZcITwriPyIiIiIiIuI/kuZIc6Q54j+vobyG8hriP1Kn/FGn/OE/y0+Is/IT4j/2cWEfF/bhP4qd2Imd2OE/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "When you take the average of the standard populati" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "According to github, when was Regression added to " - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "What is the last word before the second chorus of " - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "What is the minimum number of page links a person " - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "What was the complete title of the book in which t" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "What is the latest chronological year date written" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "Using the Biopython library in Python, parse the P" - ] - ], - "hovertemplate": "agent_name=code_o1_03_february_goodoldtext-unbroken
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_03_february_goodoldtext-unbroken", - "line": { - "color": "#FECB52", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_03_february_goodoldtext-unbroken", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAA==", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADoPzmO4ziO4+g/ZmZmZmZm5j9ddNFFF13kP1VVVVVVVeU/dmIndmIn5j+3bdu2bdvmP3d3d3d3d+c/AAAAAAAA5j+XlpaWlpbmP8dxHMdxHOc/UV5DeQ3l5T9mZmZmZmbmP7dt27Zt2+Y/0UUXXXTR5T9Ob3rTm97kP1VVVVVVVeU/w/UoXI/C5T/FTuzETuzkP1VVVVVVVeU/btu2bdu25T98GmG5pxHmP1VVVVVVVeU/rbXWWmut5T8AAAAAAADlP1VVVVVVVeU/tbS0tLS05D91UAd1UAflPxzHcRzHceQ/HEyRz7rB5D/YUF5DeQ3lPzVIgzRIg+Q/zczMzMzM5D9L1K5E7UrkPyVJkiRJkuQ/NmVNWVPW5D9ddNFFF13kP/VJn/RJn+Q/QxaykIUs5D/PRn1no77jPwAAAAAAAOQ/5hS8nIKX4z/Xo3A9CtfjP3Nzc3Nzc+M/O7ETO7ET4z/7HFITjLfiP+0ltJfQXuI/CfKUIE8J4j9u27Zt27bhP3AfwX0E9+E/lnsaYbmn4T8NJ3VfHlvhP5qZmZmZmeE/3qe4ZAjW4T8RQgghhBDiP3Icx3Ecx+E/AAAAAAAA4j+SG7mRG7nhP/DBBx988OE/CCpnt/Cr4T/i4eHh4eHhPyELWchCFuI/kiRJkiRJ4j9TT8Zvl3riP47jOI7jOOI/kyZNmjRp4j+YIp91gyniP+xRuB6F6+E/r6G8hvIa4j8De8fUwN7hP9IgDdIgDeI/dWTPQFQ64j8AAAAAAADiPxl4ujU/LOI/9DE4H4Pz4T/xRlPn1x7iP5IkSZIkSeI/cnJycnJy4j+PuCPuiDviP7xAJsULZOI/jC666KKL4j8rWclKVrLiP4Mt2IIt2OI/0y/90i/94j+ykIUsZCHjP+2yyy677OI/C2JyBTG54j/PLXHq99ziP6uqqqqqquI/8yQyDdvN4j+8nIKXU/DiP2r9SoFav+I/4XoUrkfh4j9brAzfiALjPyMjIyMjI+M/FvEJpJLz4j9P7MRO7MTiP3Mpl3Ipl+I/GG9ln0Nq4j85uNkvxIriP+0ltJfQXuI/OyMVc6sz4j9UgjwlyFPiP5gin3WDKeI/AAAAAAAA4j+Kcx2jONfhP3AfwX0E9+E/IQtZyEIW4j+E5Z5GWO7hP3Icx3Ecx+E/RdBwUvfl4T9yTQRyTQTiP97d3d3d3eE/52v17BC44T/ep7hkCNbhP7GRDhvpsOE/zjnnnHPO4T9Ei2zn+6nhP2IYhmEYhuE/WSwWi8Vi4T8AAAAAAIDhP2fMGXPGnOE/khu5kRu54T9gxQkpeZbhP3TRRRdddOE/BhkXZFyQ4T8IKme38KvhP3Icx3Ecx+E/pqWlpaWl4T/ij1uXd8DhPxolfkaJn+E/l8r2rgO64T+amZmZmZnhP8afSDileeE/ezJ+u9ST4T900UUXXXThP+Q4juM4juE/pPMWQzpv4T+MGDFixIjhP1uE/DU7auE/whT5rBtM4T83YKimYy7hP0jhehSuR+E/ianEVGIq4T/YUF5DeQ3hP9F7JtF7JuE/rfyEOCs/4T8j8SoSryLhP7ETO7ETO+E/OUG4G/se4T8GotKRPQPhP+vSY/5eG+E/MzMzMzMz4T/ti6jW2RfhPw==", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "When you take the average of the standard populati" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "According to github, when was Regression added to " - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "What is the minimum number of page links a person " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "What is the final numeric output from the attached" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "What was the complete title of the book in which t" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "What is the latest chronological year date written" - ] - ], - "hovertemplate": "agent_name=code_o1_03_february_remove-navigational
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_03_february_remove-navigational", - "line": { - "color": "#636efa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_03_february_remove-navigational", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAA==", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADkP1VVVVVVVeU/MzMzMzMz4z9ddNFFF13kP1VVVVVVVeU/FDuxEzux4z+SJEmSJEniPzMzMzMzM+M/AAAAAAAA4j/x8PDw8PDgPwAAAAAAAOA/DeU1lNdQ3j8AAAAAAADgPzEMwzAMw+A/dNFFF1104T8hC1nIQhbiP6uqqqqqquI/7FG4HoXr4T+xEzuxEzvhP3Icx3Ecx+E/SZIkSZIk4T+WexphuafhPyIiIiIiIuI/lVJKKaWU4j8AAAAAAADiP22yySabbOI/09LS0tLS4j+SJEmSJEniP3Icx3Ecx+E/mCKfdYMp4j/zGsprKK/hP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhPzEMwzAMw+A/GPQFfUFf4D+66KKLLrrgPxEREREREeE/FrKQhSxk4T/E5ApicgXhP1VVVVVVVeE/aKwPjfWh4T/sUbgehevhP5KRkZGRkeE/sRM7sRM74T+pCcZb2efgP/cS2ktoL+E/37D2DWvf4D9JkiRJkiThP3UW01lMZ+E/lnsaYbmn4T8NJ3VfHlvhP5qZmZmZmeE/DcE62rxP4T8IIYQQQgjhP1EURVEUReE/AAAAAAAA4T/RC73QC73gP/jggw8++OA/TKQHKme34D/x8PDw8PDgPwtZyEIWsuA/oQ7qoA7q4D8OJFphcyDhP1VVVVVVVeE/jBgxYsSI4T/CFPmsG0zhPxEREREREeE/eQ3lNZTX4D/lJ8RZ+QnhP7ETO7ETO+E/1uImzO9q4T8zMzMzMzPhPyNl4OnW/OA/yOB8DM7H4D+FN5o6v/bgP0mSJEmSJOE/UVFRUVFR4T9f0Bf0BX3hPwOZFC+QSeE/dNFFF1104T8UoQhFKELhP8EWbMEWbOE/UhmVURmV4T8WspCFLGThPzTRRBNNNOE/xOQKYnIF4T95DeU1lNfgP6uqqqqqquA/sd0sTyLT4D8qeDkFL6fgP3o7Q2LezuA/9ihcj8L14D/sZ4uV4RvhP0FBQUFBQeE/PoFUcl4W4T+xEzuxEzvhP/EVX/EVX+E/b2WfQ2qC4T9ws1+IFaXhP3Icx3Ecx+E/1hmpmFud4T900UUXXXThP8IU+awbTOE/27Zt27Zt4T+c6xjFuY7hP3UW01lMZ+E/FG01eI5A4T+oEZZ7GmHhP7ETO7ETO+E/cVL35bEV4T/x8PDw8PDgP83MzMzMzOA/HgI3lkGp4D/S5n2KS4bgP6cQaAqBpuA/xhhjjDHG4D+kcD0K16PgPzEMwzAMw+A/OBwOh8Ph4D8AAAAAAADhP0fcEXfEHeE/sRM7sRM74T9WnJCSZxnhP/jggw8++OA/UxFLRSwV4T+7hV+NifTgPxEREREREeE/8fDw8PDw4D+7vAOOFA3hPw/MtQNz7eA/jnn6aDUJ4T9JkiRJkiThP8TkCmJyBeE/DiRaYXMg4T+xEzuxEzvhP1VVVVVVVeE/pPMWQzpv4T8LFSpUqFDhP01c6d6AMuE/whT5rBtM4T+eFCR/XmXhP36x5BdLfuE/jVvGLeOW4T+U11BeQ3nhP5KRkZGRkeE/dNFFF1104T+MMcYYY4zhP0IapEEapOE/+x6RE4S74T8+A1HpyJ7hP29ln0NqguE/ZmZmZmZm4T9epZigu0rhP/cS2ktoL+E/fJu/wqxG4T8sUbsStSvhPw==", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "According to github, when was Regression added to " - ], - [ - "When you take the average of the standard populati" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "What is the latest chronological year date written" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "What is the final numeric output from the attached" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "What was the complete title of the book in which t" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "At the two-minute mark in the YouTube video upload" - ] - ], - "hovertemplate": "agent_name=code_o1_03_february_text_high-reasoning-effort
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_03_february_text_high-reasoning-effort", - "line": { - "color": "#EF553B", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_03_february_text_high-reasoning-effort", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADgP1VVVVVVVdU/AAAAAAAA4D+amZmZmZnZPwAAAAAAAOA/27Zt27Zt2z8AAAAAAADYPxzHcRzHcdw/AAAAAAAA4D900UUXXXThPwAAAAAAAOA/sRM7sRM74T8AAAAAAADgP97d3d3d3d0/AAAAAAAA3D8eHh4eHh7ePxzHcRzHcdw/KK+hvIby2j+amZmZmZnZP9u2bdu2bds/L7rooosu2j+96U1vetPbP6uqqqqqqto/mpmZmZmZ2T/ZiZ3YiZ3YPy+hvYT2Eto/27Zt27Zt2z9huacRlnvaP5qZmZmZmdk/xhhjjDHG2D8AAAAAAADaPyebbLLJJts/PDw8PDw83D8d1EEd1EHdPxzHcRzHcdw/0LrBFPms2z8or6G8hvLaPxqkQRqkQdo/mpmZmZmZ2T+J2pWoXYnaP9u2bdu2bds/s6asKWvK2j+jiy666KLbP1uwBVuwBds/velNb3rT2z9t1Hc26jvbPwAAAAAAANw/27Zt27Zt2z/hehSuR+HaP5ybm5ubm9s/O7ETO7ET2z8KxlvZ55DaPya0l9BeQts/w9o3rH3D2j/btm3btm3bPx/BfQT3Edw/GmG5pxGW2z8EDSd1Xx7bP7y7u7u7u9s/Q7CONu9T3D/nnHPOOefcPxzHcRzHcdw/AAAAAAAA3D/dyI3cyI3cPx988MEHH9w/NSbSA5Wz2z9LS0tLS0vbP64dmGsH5to/27Zt27Zt2z8yfrvUk/HbPxzHcRzHcdw/4MCBAwcO3D/QusEU+azbPylcj8L1KNw/oryG8hrK2z/5CXFWfkLcP33Lt3zLt9w/VDqyZyAq3T/NzMzMzMzcP2t+WKQMPN0/qV2J2pWo3T/3kMuKgRLeP27btm3btt0/Hh4eHh4e3j9xR9wRd8TdPyCT4gUyKd4/jC666KKL3j/vda973evePz/pkz7pk94/3uM93uM93j/f9KY3vendP5dddtlll90/eDbqOxv13T9G2rECYaTdPwAAAAAAAN4/3ixPItOw3T+Dl1PwcgrePw2JeTtDYt4/uB6F61G43j/IXT9brAzfP19fX19fX98/FEgl52UR3z8ndmIndmLfPyD7sR/7sd8/OqQmGG9l3z83+4VYURrfPwntJbSX0N4/hOjxXTiI3j/WvmHtG9beP/DolbH9jt4/kiRJkiRJ3j9bWOmphZXePwnuI7iP4N4/6k1vetOb3j9HWO5phOXePx/qoR7qod4/VwQNJ3Vf3j9fzKdezKfeP2ZmZmZmZt4/4MYyKBUm3j+KS4ZgHW3ePy6e3OLJLd4/dM4555xz3j+4HoXrUbjeP57neZ7ned4/j8fj8Xg83j8AAAAAAADeP3FH3BF3xN0/ntiJndiJ3T9Ux97aMM3dP5NNNtlkk90/Wt1pdafV3T+K9EDl7BbeP97d3d3d3d0/Hh4eHh4e3j+kaIg/bl3eP+JnlPgZJd4/le1dB3Rj3j++4iu+4iveP3rxJxJOad4/u9ST8dul3j+qz7Q1/m7eP47jOI7jON4/Y0jnLYZ03j/16tWrV6/eP7o3oExc6d4/PusGU+Sz3j8uEZ4UJH/eP7gehetRuN4/+MJ74b3w3j+H8hrKayjfP59J9J5J9N4/4qz8hDgr3z/43nvvvffeP0/sxE7sxN4/EjlBuBv73j9hfleLmzDfPzqkJhhvZd8/mpmZmZmZ3z+P5g82Hs3fP1ikDDzdmt8/sxpFHDpp3z84H4PzMTjfPwgffPDBB98/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "When you take the average of the standard populati" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "What is the minimum number of page links a person " - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "According to github, when was Regression added to " - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "What is the final numeric output from the attached" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "What is the latest chronological year date written" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "What is the average number of pre-2020 works on th" - ] - ], - "hovertemplate": "agent_name=code_o1_04_february_submission
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_04_february_submission", - "line": { - "color": "#00cc96", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_04_february_submission", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D+amZmZmZnpP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADgPxzHcRzHcdw/AAAAAAAA4D900UUXXXThPwAAAAAAAOA/sRM7sRM74T+SJEmSJEniPxEREREREeE/AAAAAAAA4j/T0tLS0tLiP+Q4juM4juM/XkN5DeU15D/NzMzMzMzkP/Q8z/M8z+M/XXTRRRdd5D84velNb3rjP6uqqqqqquI/7FG4HoXr4T+xEzuxEzvhP3Icx3Ecx+E/kiRJkiRJ4j9PIyz3NMLiPzMzMzMzM+M/lVJKKaWU4j8AAAAAAADjP22yySabbOI/09LS0tLS4j8zMzMzMzPjP6uqqqqqquI/bzBFPusG4z9sKK+hvIbiP9IgDdIgDeI/ZmZmZmZm4j+7ErUrUbviP5IkSZIkSeI/p6wpa8qa4j/poosuuujiP9InfdInfeI/IQtZyEIW4j9HfWejvrPhP1VVVVVVVeE/PzTWh8b64D9I4XoUrkfhP/Hw8PDw8OA/2Ymd2Imd4D+pCcZb2efgP3sJ7SW0l+A/SpCnBHlK4D8AAAAAAADgP34E9xHcR+A/3dMIyz2N4D+c1H15bEXgPwAAAAAAAOA/afM+xSVD4D+EEEIIIYTgPxAEQRAEQeA/AAAAAACA4D/RC73QC73gP3zwwQcffOA/b+FXYyI94D94eHh4eHjgPwtZyEIWsuA/oQ7qoA7q4D8OJFphcyDhP1VVVVVVVeE/jBgxYsSI4T/CFPmsG0zhPxEREREREeE/eQ3lNZTX4D9WfkKclZ/gP5AGaZAGaeA/N2F+V4ub4D/NzMzMzMzgP3sJ7SW0l+A/yOB8DM7H4D+2h1xWDJTgPxiGYRiGYeA/kZCQkJCQ4D8w6Av6gr7gP93TCMs9jeA/uuiiiy664D8Oc5jDHObgP2ELtmALtuA/cQiHcAiH4D+GLGQhC1ngPyywwAILLOA/QUyuICZX4D8WCCPtWIHgP1VVVVVVVeA/8MXVDzoq4D8VvJyCl1PgP3+lQK1fKeA/AAAAAAAA4D+YdGoe5K7fP19fX19fX98/XG0MTXew3z8ndmIndmLfPyD7sR/7sd8/AAAAAAAA4D+9U9dycLPfPwAAAAAAAOA/TvvJEti03z9r37D2DWvfPyryWTeYIt8/27Zt27Zt3z8SePshgbffPwT3EdxHcN8//HVJ5cO43z8jLPc0wnLfP6D7uZ/7ud8/yFYEDSd13z/gKLvfKLvfPwAAAAAAAOA/jmVQKky83z8AAAAAAADgPyHQFAJNIeA/AAAAAAAA4D9YObTIdr7fP9/3fd/3fd8/0Ofz+Xw+3z8AAAAAAADfP9AX9AV9Qd8/P/ADP/AD3z9xQkqeZUTfPwgffPDBB98/c/TN0TdH3z9FeqBydgvfP5/0SZ/0Sd8/Dw8PDw8P3z/ZLKj2nEzfP/EzSvyMEt8/964DujFP3z9f8RVf8RXfP3usZeiA3d4/u9ST8dul3j8wS8oBkeHeP8dxHMdxHN8/Kmj1pYJW3z/58ePHjx/fP7o3oExc6d4/KvJZN5gi3z/L4ox2D1vfP5NfLPnFkt8//iZ/k7/J3z9DeQ3lNZTfPyB1yh91yt8/cVZ+QpyV3z/LX7L8JcvfPwAAAAAAAOA/S3r50xYa4D8AAAAAAADgP742Yl16zN8/AAAAAAAA4D+P5g82Hs3fP1ikDDzdmt8/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "In terms of geographical distance between capital " - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "When you take the average of the standard populati" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "According to github, when was Regression added to " - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "What is the last word before the second chorus of " - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ] - ], - "hovertemplate": "agent_name=code_o1_04_february_submission-medium
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_04_february_submission-medium", - "line": { - "color": "#ab63fa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_04_february_submission-medium", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3w=", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADkP3Icx3Ecx+E/MzMzMzMz4z9ddNFFF13kP1VVVVVVVeU/dmIndmIn5j8lSZIkSZLkP1VVVVVVVeU/AAAAAAAA5D/T0tLS0tLiP3Icx3Ecx+E/bCivobyG4j+amZmZmZnhP5IkSZIkSeI/6aKLLrro4j8hC1nIQhbiP1VVVVVVVeE/7FG4HoXr4T+xEzuxEzvhP3Icx3Ecx+E/kiRJkiRJ4j9PIyz3NMLiPyIiIiIiIuI/lVJKKaWU4j8AAAAAAADiP3TRRRdddOE/4uHh4eHh4T/xFV/xFV/hPzmO4ziO4+A/6wZT5LNu4D95DeU1lNfgP7ETO7ETO+E/zczMzMzM4D8sUbsStSvhPzEMwzAMw+A/R9wRd8Qd4T+66KKLLrrgP7AFW7AFW+A/C1nIQhay4D/E5ApicgXhP6uqqqqqquA/FbycgpdT4D+kcD0K16PgP/Hw8PDw8OA/2Ymd2Imd4D+pCcZb2efgP3sJ7SW0l+A/37D2DWvf4D8lSZIkSZLgP3kN5TWU1+A/3dMIyz2N4D+c1H15bEXgPwAAAAAAAOA/afM+xSVD4D+EEEIIIYTgPzEMwzAMw+A/AAAAAACA4D/wAz/wAz/gP3zwwQcffOA/b+FXYyI94D94eHh4eHjgPwRz7cBcO+A/UAd1UAd14D82BxKtsDngPxzHcRzHceA/ggMHDhw44D8AAAAAAADgP5NfLPnFkt8/AAAAAAAA4D/H1MDeMTXgPwAAAAAAAOA/Mb+rxU2Y3z8AAAAAAADgP1ikDDzdmt8/OB+D8zE43z+U8EZT59feP57neZ7ned4/Hh4eHh4e3j+hL+gL+oLePyCT4gUyKd4/jC666KKL3j/vda973evePz/pkz7pk94/3uM93uM93j/f9KY3vendP5dddtlll90/eDbqOxv13T9G2rECYaTdPwAAAAAAAN4/nkSmYbtZ3j+sD431obHePw2JeTtDYt4/FK5H4XoU3j8pMOnUPMjdPx4eHh4eHt4/zCI+gVRy3j92Yid2YifeP57neZ7ned4/dEhNMN7K3j83+4VYURrfP4X2EtpLaN8/6fFdOIge3z9r37D2DWvfP2P7Hb0ytt8/27Zt27Zt3z8SePshgbffPwAAAAAAAOA//HVJ5cO43z8jLPc0wnLfP9/yLd/yLd8/yFYEDSd13z+fejGfejHfP+/u7u7u7t4/xfuR03yt3j9cMgTraPPePzgfg/MxON8/fO+999573z8IrBxaZDvfPw==", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "When you take the average of the standard populati" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "According to github, when was Regression added to " - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ] - ], - "hovertemplate": "agent_name=code_o1_04_february_submission3
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_04_february_submission3", - "line": { - "color": "#FFA15A", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_04_february_submission3", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMA==", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/kiRJkiRJ4j8AAAAAAADkP1VVVVVVVeU/ZmZmZmZm5j9ddNFFF13kP6uqqqqqquI/sRM7sRM74T8AAAAAAADgPxEREREREeE/AAAAAAAA4j/x8PDw8PDgPwAAAAAAAOA/DeU1lNdQ3j8AAAAAAADgP57neZ7ned4/AAAAAAAA4D/qTW9605vePwAAAAAAAOA/pHA9Ctej4D8AAAAAAADgPwntJbSX0N4/btu2bdu23T9HWO5phOXePwAAAAAAAOA/hBBCCCGE4D8AAAAAAADgPwgffPDBB98/AAAAAAAA4D9QB3VQB3XgPwAAAAAAAOA/6wZT5LNu4D8AAAAAAADgP5AGaZAGaeA/AAAAAAAA4D9kcD4G52PgPwAAAAAAAOA/0Bf0BX1B3z8AAAAAAADgP7AFW7AFW+A/AAAAAAAA4D99Z6O+s1HfPwAAAAAAAOA/1ofG+tBY3z8=", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "When you take the average of the standard populati" - ], - [ - "The object in the British Museum's collection with" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ] - ], - "hovertemplate": "agent_name=code_o1_04_february_submission4
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_04_february_submission4", - "line": { - "color": "#19d3f3", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_04_february_submission4", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQF", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjPwAAAAAAAOA/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "When you take the average of the standard populati" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "According to github, when was Regression added to " - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "What is the last word before the second chorus of " - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "What is the final numeric output from the attached" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "A 5-man group made up of one tank, one healer, and" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "What is the latest chronological year date written" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ] - ], - "hovertemplate": "agent_name=code_o1_04_february_submission5
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_04_february_submission5", - "line": { - "color": "#FF6692", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_04_february_submission5", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADgP1VVVVVVVeU/AAAAAAAA4D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADoPzmO4ziO4+g/mpmZmZmZ6T8vuuiiiy7qP6uqqqqqquo/O7ETO7ET6z9JkiRJkiTpP5qZmZmZmek/AAAAAAAA6j94eHh4eHjoP8dxHMdxHOc/Q3kN5TWU5z8AAAAAAADoPxiGYRiGYeg/RhdddNFF5z9kIQtZyELmP1VVVVVVVeU/exSuR+F65D8UO7ETO7HjP2gvob2E9uI/27Zt27Zt4z8Jyz2NsNzjPzMzMzMzM+M/lVJKKaWU4j8AAAAAAADjP22yySabbOI/09LS0tLS4j+SJEmSJEniP6uqqqqqquI/bzBFPusG4z/lNZTXUF7jPxQ7sRM7seM/AAAAAAAA5D9L1K5E7UrkP/Q8z/M8z+M/Bn1BX9AX5D+jiy666KLjPzMzMzMzM+M/OL3pTW964z9MriAmVxDjP1VVVVVVVeM/5hS8nIKX4z/Xo3A9CtfjPxQUFBQUFOQ/7MRO7MRO5D9NMN7KPofkP9pLaC+hveQ/XXTRRRdd5D8lSZIkSZLkP15DeQ3lNeQ/5p5GWO5p5D9fHlsRNJzkP0REREREROQ/JkOwjjbv4z+dc84555zjP/Q8z/M8z+M/AAAAAACA4z8zMzMzMzPjP+miiy666OI/oHJ2C78a4z9LS0tLS0vjPzDXDsy1A+M/4yu+4iu+4j9TT8Zvl3riP47jOI7jOOI/kB8/fvz44T+YIp91gyniP1nyiyW/WOI/bCivobyG4j8hzspPiLPiP/Mt3/It3+I/E+Z3tbgJ4z/NzMzMzMziP8HTrflhkeI/V6J2JWpX4j+/9pDLioHiP5IkSZIkSeI/EhISEhIS4j+PuCPuiDviP7xAJsULZOI/L7rooosu4j8g/ehHP/rhPyIiIiIiIuI/kiRJkiRJ4j+nN73pTW/iP5VSSimllOI/yhXE5Api4j9sKK+hvIbiP1VVVVVVVeI/EpmG7WZ54j+n4OUUvJziP2r9SoFav+I/j8L1KFyP4j/zIHf9bLHiP9PS0tLS0uI/cl4W8Qmk4j9iJ3ZiJ3biP5IkSZIkSeI/GG9ln0Nq4j/2C7GiND7iP+0ltJfQXuI/OyMVc6sz4j8J8pQgTwniP5gin3WDKeI/kiRJkiRJ4j94+yGBtx/iP3AfwX0E9+E/HYGirQbP4T+E5Z5GWO7hP9IgDdIgDeI/4qTuy2Mr4j9yTQRyTQTiPyIiIiIiIuI/y6BUmHg/4j+wjjbvU1ziPzbSYSMdNuI/U0oppZRS4j+TGARWDi3iP4IgCIIgCOI/iUQikUgk4j8AAAAAAADiP3fEHXFH3OE/gh/4gR/44T9q7oK/ihPiPy+66KKLLuI/kiRJkiRJ4j/l7BZ+NSbiPz0gWefKA+I/Hh4eHh4e4j/8cevyDjjiPyV+RomfUeI/oBvz9NFq4j87qIM6qIPiP8oVxOQKYuI/HUi0wuZA4j++CmZJOSDiP47jOI7jOOI/eoshnbcY4j8SI0aMGDHiP5IkSZIkSeI/DqbIZ91g4j+imo65RHjiP1nyiyW/WOI/kuZIc6Q54j8N5TWU11DiPxK9ZxK9Z+I/kiRJkiRJ4j8rEq8i8SriP9IgDdIgDeI/kROEu7Hv4T8NRKUjewbiP/P32oh16eE/zczMzMzM4T+w8Wj+YOPhP0bKwNOt+eE/yYB6pnLd4T/C+Ricj8HhP6YxYBoDpuE/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "A paper about AI regulation that was originally su" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "According to github, when was Regression added to " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "When you take the average of the standard populati" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "In terms of geographical distance between capital " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "In the endnote found in the second-to-last paragra" - ] - ], - "hovertemplate": "agent_name=code_o1_22-01_managedagent-summary_planning
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_22-01_managedagent-summary_planning", - "line": { - "color": "#B6E880", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_22-01_managedagent-summary_planning", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQg==", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA6D8zMzMzMzPjP1VVVVVVVeU/t23btm3b5j8AAAAAAADkP3Icx3Ecx+E/AAAAAAAA4D8XXXTRRRfdPwAAAAAAAOA/sRM7sRM74T8AAAAAAADgPxEREREREeE/AAAAAAAA4D8eHh4eHh7ePxzHcRzHcdw/KK+hvIby2j+amZmZmZnZPxiGYRiGYdg/RhdddNFF1z+RhSxkIQvZPwAAAAAAANg/mpmZmZmZ2T/ZiZ3YiZ3YP0J7Ce0ltNc/t23btm3b1j98GmG5pxHWP1VVVVVVVdU/pZRSSiml1D8AAAAAAADUP2WTTTbZZNM/tbS0tLS01D8WX/EVX/HVP1VVVVVVVdU/yWfdYIp81j9DeQ3lNZTXP9mJndiJndg/mpmZmZmZ2T/6GJyPwfnYP3qe53me59k/s6asKWvK2j8vuuiiiy7aP5qZmZmZmdk/pze96U1v2j9t1Hc26jvbPwAAAAAAANw/27Zt27Zt2z/hehSuR+HaP1paWlpaWto/O7ETO7ET2z+WfQ6pCcbbPxzHcRzHcdw/F1100UUX3T8lSZIkSZLcPxbTWUxnMd0/jbDc0wjL3T/msRVBw0ndP83MzMzMzNw/Q7CONu9T3D/fe++9997bP9u2bdu2bds/AAAAAAAA2z9bqZVaqZXaPyebbLLJJts/eqBydgu/2j8=", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "A paper about AI regulation that was originally su" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "According to github, when was Regression added to " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "When you take the average of the standard populati" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "In terms of geographical distance between capital " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "In the year 2022, and before December, what does \"" - ] - ], - "hovertemplate": "agent_name=code_o1_25-01_visioon
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_25-01_visioon", - "line": { - "color": "#FF97FF", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_25-01_visioon", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ=", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAA8D8AAAAAAADgP1VVVVVVVdU/AAAAAAAA0D+amZmZmZnJP1VVVVVVVdU/27Zt27Zt2z8AAAAAAADYP1VVVVVVVdU/MzMzMzMz0z900UUXXXTRP1VVVVVVVdU/2Ymd2Imd2D+3bdu2bdvWP5qZmZmZmdk/AAAAAAAA2D+XlpaWlpbWPzmO4ziO49g/Q3kN5TWU1z9mZmZmZmbWP1VVVVVVVdU/XXTRRRdd1D9kIQtZyELWP1VVVVVVVdU/exSuR+F61D92Yid2YifWP1VVVVVVVdU/JUmSJEmS1D8Jyz2NsNzTPzMzMzMzM9M/lVJKKaWU0j8AAAAAAADSP3TRRRdddNE/09LS0tLS0j/UQR3UQR3UP+Q4juM4jtM/HEyRz7rB1D9RXkN5DeXVP1VVVVVVVdU/ZmZmZmZm1j/blahdidrVP1VVVVVVVdU/lTVlTVlT1j/RRRdddNHVP1VVVVVVVdU/ZCELWchC1j9dQUyuICbXP6uqqqqqqtY/jfWhsT401j/D9Shcj8LVP1VVVVVVVdU/xU7sxE7s1D/Z55CaYLzVPw==", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "A paper about AI regulation that was originally su" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "The object in the British Museum's collection with" - ], - [ - "According to github, when was Regression added to " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "When you take the average of the standard populati" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "In terms of geographical distance between capital " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "What is the latest chronological year date written" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ] - ], - "hovertemplate": "agent_name=code_o1_29-01_text
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o1_29-01_text", - "line": { - "color": "#FECB52", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o1_29-01_text", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdo", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVeU/AAAAAAAA4D+amZmZmZnZPwAAAAAAAOA/kiRJkiRJ4j8AAAAAAADgPxzHcRzHcdw/mpmZmZmZ2T9GF1100UXXP1VVVVVVVdU/2Ymd2Imd2D+3bdu2bdvWP5qZmZmZmdk/AAAAAAAA3D9aWlpaWlraPzmO4ziO49g/Q3kN5TWU1z9mZmZmZmbWP1VVVVVVVdU/XXTRRRdd1D9kIQtZyELWP1VVVVVVVdU/exSuR+F61D8UO7ETO7HTP2gvob2E9tI/kiRJkiRJ0j+WexphuafRPxEREREREdE/hBBCCCGE0D8AAAAAAADQPwgffPDBB88/8fDw8PDw0D+SJEmSJEnSP+Q4juM4jtM/HEyRz7rB1D9RXkN5DeXVP5dv+ZZv+dY/AAAAAAAA2D9qV6J2JWrXPxiGYRiGYdg/9AV9QV/Q1z9GF1100UXXPxdswRZswdY/etOb3vSm1z9icgUxuYLYPwAAAAAAANg/4eUUvJyC1z8K16NwPQrXP5eWlpaWltY/dmIndmIn1j9ln0NqgvHWP0J7Ce0ltNc/cFj7hrVv2D9JkiRJkiTZPzGdxXQW09k/YbmnEZZ72j+Uui+PrQjaP5qZmZmZmdk/WEeb9yku2T/GGGOMMcbYPxiGYRiGYdg/AAAAAAAA2D8YeqEXeqHXPz744IMPPtg/SQ9Uzm7h1z+Ih4eHh4fXP4K5dmCuHdg/+Yqv+Iqv2D/RCpsDiVbYPwAAAAAAANg/vXr16tWr1z+fdYMp8lnXP+UXS36x5Nc/Q3kN5TWU1z9kamDvmBrYP9mJndiJndg/OrJnICod2T/NzMzMzMzYP5Ey8HRrftg/Mjgfg/Mx2D+q82sPuazYPxiGYRiGYdg/GBgYGBgY2D8k7og74o7YP+5phOWeRtg/AAAAAAAA2D983ete97rXP9iCLdiCLdg/2Ymd2Imd2D+GLGQhC1nYP8YYY4wxxtg/YnIFMbmC2D8LhJF2rEDYPwAAAAAAANg/2G6WJ5Fp2D801ofG+tDYPzbZZJNNNtk/mpmZmZmZ2T96kLt+tljZP7q5ubm5udk/i/gEUsl52T+xEzuxEzvZP9mP/diP/dg/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "What's the last line of the rhyme under the flavor" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "A paper about AI regulation that was originally su" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "In Valentina Reโ€™s contribution to the 2017 book โ€œW" - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "Compute the check digit the Tropicos ID for the Or" - ], - [ - "Could you help me out with this assignment? Our pr" - ], - [ - "Given this table defining * on the set S = {a, b, " - ], - [ - "What time was the Tri-Rail train that carried the " - ], - [ - "In the fictional language of Tizin, basic sentence" - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "In terms of geographical distance between capital " - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "I was trying to remember how well the Cheater Beat" - ], - [ - "The attached file contains a list of vendors in th" - ], - [ - "Review the chess position provided in the image. I" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "Who nominated the only Featured Article on English" - ], - [ - "The Latin root of the Yola word \"gimlie\" shares a " - ], - [ - "The attached file shows a list of books in the col" - ], - [ - "According to Google Finance, when was the first ye" - ], - [ - "Using bass clef notes, what is the age of someone " - ], - [ - "On a leap day before the year 2008, a joke was rem" - ], - [ - "On July 15, 2008, Phys.org published an article ab" - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "If there is anything that doesn't make sense in th" - ], - [ - "When you take the average of the standard populati" - ], - [ - "The following numbers function similarly to ISBN 1" - ], - [ - "In the year 2022, and before December, what does \"" - ], - [ - "What is the volume in milliliters of a system comp" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "The attached file lists accommodations in the reso" - ], - [ - "In the NIH translation of the original 1913 Michae" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ], - [ - "You are Van Helsing, a renowned vampire hunter. A " - ], - [ - "Find the value of x to the nearest tenth: Lx = (d/" - ], - [ - "You are a telecommunications engineer who wants to" - ], - [ - "According to Box Office Mojo's 2020 Worldwide Box " - ], - [ - "How many applicants for the job in the PDF are onl" - ], - [ - "As of the 2020 census, what was the population dif" - ], - [ - "The Metropolitan Museum of Art has a portrait in i" - ], - [ - "How many slides in this PowerPoint presentation me" - ], - [ - "This is a secret message my friend gave me. It say" - ], - [ - "According to wikipedia, how many Asian countries s" - ], - [ - "The work referenced in footnote 397 of Federico La" - ], - [ - "I was referencing each of the tables in the file f" - ], - [ - "In Nature journal's Scientific Reports conference " - ], - [ - "The attached file shows the locomotives in the col" - ], - [ - "How many nonindigenous crocodiles were found in Fl" - ], - [ - "As a comma separated list with no whitespace, usin" - ], - [ - "According to the World Bank, which countries had g" - ], - [ - "The attached spreadsheet contains the sales of men" - ], - [ - "Who composed the song that was performed by a roos" - ], - [ - "I'm making a grocery list for my mom, but she's a " - ], - [ - "According to github, when was Regression added to " - ], - [ - "In the 2018 VSCode blog post on replit.com, what w" - ], - [ - "Look at the attached image. The quiz is scored as " - ], - [ - "What writer is quoted by Merriam-Webster for the W" - ], - [ - "Examine the video at https://www.youtube.com/watch" - ], - [ - "Hi, I'm making a pie but I could use some help wit" - ], - [ - "In the Scikit-Learn July 2017 changelog, what othe" - ], - [ - "You are given this Excel file as a map. You start " - ], - [ - "How many images are there in the latest 2022 Lego " - ], - [ - "The attached image contains a Python script. Run t" - ], - [ - "I thought we could try a fun word puzzle together " - ], - [ - "On ScienceDirect, what is the difference to 3 deci" - ], - [ - "What is the final numeric output from the attached" - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "How many more blocks (also denoted as layers) in B" - ], - [ - "The longest-lived vertebrate is named after an isl" - ], - [ - "On the DeepFruits fruit detection graph on Connect" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "The attached PDF lists accommodations in the resor" - ], - [ - "This spreadsheet contains a list of clients for a " - ], - [ - "How many times was a Twitter/X post cited as a ref" - ], - [ - "During the first week of August 2015, one of the N" - ], - [ - "What is the surname of the equine veterinarian men" - ], - [ - "The YouTube channel Game Grumps began a Letโ€™s Play" - ], - [ - "What is the last word before the second chorus of " - ], - [ - "Who did the actor who played Ray in the Polish-lan" - ], - [ - "I have the Standard plan in the image below, and I" - ], - [ - "In the endnote found in the second-to-last paragra" - ], - [ - "The book with the doi 10.1353/book.24372 concerns " - ], - [ - "Pull out the sentence in the following 5x7 block o" - ], - [ - "What is the latest chronological year date written" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - "Eva Draconis has a personal website which can be a" - ], - [ - "How many at bats did the Yankee with the most walk" - ], - [ - "According to Girls Who Code, how long did it take " - ], - [ - "The attached spreadsheet contains a list of books " - ], - [ - "How many pages if the 2023 IPCC report (85 pages v" - ], - [ - "It's May 2023, and I'm about to drive across the U" - ], - [ - "In Audre Lordeโ€™s poem โ€œFather Son and Holy Ghostโ€," - ], - [ - "On Cornell Law School website's legal information " - ], - [ - "How many edits were made to the Wikipedia page on " - ], - [ - "Consider the following symbols: ๐’œ ๐’๐’š\n\nThis is a n" - ], - [ - "On the BBC Earth YouTube video of the Top 5 Sillie" - ], - [ - "What is the absolute difference in tens of thousan" - ], - [ - "The attached spreadsheet lists the locomotives own" - ], - [ - "The attached file lists the locomotives owned by a" - ], - [ - "Iโ€™m thinking about selling my home, so I want to l" - ], - [ - "When was a picture of St. Thomas Aquinas first add" - ], - [ - "As of August 2023, who is the only winner of the U" - ], - [ - "Take the gender split from the 2011 Bulgarian cens" - ], - [ - "All of the individuals who formally held the posit" - ], - [ - "Hi, I was out sick from my classes on Friday, so I" - ], - [ - "If this whole pint is made up of ice cream, how ma" - ], - [ - "Which of the fruits shown in the 2008 painting \"Em" - ], - [ - "What country had the least number of athletes at t" - ], - [ - "In the YouTube 360 VR video from March 2018 narrat" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "Where were the Vietnamese specimens described by K" - ], - [ - "The cover of the August 2021 issue of Vogue shows " - ], - [ - "I'd like to learn more about some popular reality " - ], - [ - "I read a paper about multiwavelength observations " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "Given $x_0 = -5$ and $f(x) = x^3 + 4x^2 - 3x + 8$," - ], - [ - "A standard Rubikโ€™s cube has been broken into cubes" - ], - [ - "According to the USGS, in what year was the Americ" - ], - [ - "The attached Excel file contains the sales of menu" - ], - [ - "I'm curious about how much information is availabl" - ], - [ - "What percentage of the total penguin population ac" - ], - [ - "As of May 2023, how many stops are between South S" - ], - [ - "According to Openreview.net, at the NeurIPS 2022 C" - ], - [ - "Of the cities within the United States where U.S. " - ], - [ - "Who are the pitchers with the number before and af" - ], - [ - "In the 2015 Metropolitan Museum of Art exhibition " - ], - [ - "On June 6, 2023, an article by Carolyn Collins Pet" - ], - [ - "What is the area of the green polygon in the attac" - ], - [ - "What is the first name of the only Malko Competiti" - ], - [ - "The brand that makes these harnesses the dogs are " - ], - [ - "The year is 2022. I am at the National Air and Spa" - ], - [ - "What was the actual enrollment count of the clinic" - ], - [ - "What was the complete title of the book in which t" - ], - [ - "Bob was invited to participate in a game show, and" - ], - [ - "In NASA's Astronomy Picture of the Day on 2006 Jan" - ], - [ - "At the two-minute mark in the YouTube video upload" - ], - [ - "In the film Goldfinger, what color was the object " - ], - [ - "A 5-man group made up of one tank, one healer, and" - ] - ], - "hovertemplate": "agent_name=code_o3-mini_03_february_remove-navigational
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_o3-mini_03_february_remove-navigational", - "line": { - "color": "#636efa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_o3-mini_03_february_remove-navigational", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAABAAIAAwAEAAUABgAHAAgACQAKAAsADAANAA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAfACAAIQAiACMAJAAlACYAJwAoACkAKgArACwALQAuAC8AMAAxADIAMwA0ADUANgA3ADgAOQA6ADsAPAA9AD4APwBAAEEAQgBDAEQARQBGAEcASABJAEoASwBMAE0ATgBPAFAAUQBSAFMAVABVAFYAVwBYAFkAWgBbAFwAXQBeAF8AYABhAGIAYwBkAGUAZgBnAGgAaQBqAGsAbABtAG4AbwBwAHEAcgBzAHQAdQB2AHcAeAB5AHoAewB8AH0AfgB/AIAAgQCCAIMAhACFAIYAhwCIAIkAigCLAIwAjQCOAI8AkACRAJIAkwCUAJUAlgCXAJgAmQCaAJsAnACdAJ4AnwCgAKEAogCjAKQA", - "dtype": "i2" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVdU/AAAAAAAA0D+amZmZmZnJP1VVVVVVVcU/kiRJkiRJwj8AAAAAAADAPxzHcRzHccw/mpmZmZmZyT9GF1100UXHPwAAAAAAANA/FDuxEzux0z+SJEmSJEnSP1VVVVVVVdU/AAAAAAAA1D/T0tLS0tLSP3Icx3Ecx9E/XkN5DeU11D9mZmZmZmbWP1VVVVVVVdU/RhdddNFF1z9kIQtZyELWP1VVVVVVVdU/exSuR+F61D8UO7ETO7HTP2gvob2E9tI/kiRJkiRJ0j+WexphuafRPzMzMzMzM9M/lVJKKaWU0j8AAAAAAADSP2WTTTbZZNM/09LS0tLS0j+SJEmSJEnSP3Icx3Ecx9E/whT5rBtM0T9sKK+hvIbSP9IgDdIgDdI/mpmZmZmZ0T+7ErUrUbvSP5IkSZIkSdI/1pQ1ZU1Z0z9ddNFFF13UP5Q+6ZM+6dM/OL3pTW960z9MriAmVxDTP6uqqqqqqtI/kiRJkiRJ0j8zMzMzMzPTP9PS0tLS0tI/FDuxEzux0z/BeCv7HFLTP19CewntJdQ/yFOCPCXI0z/btm3btm3TP2cxncV0FtM/Ccs9jbDc0z/vy2MrgobTPzMzMzMzM9M/JkOwjjbv0z+llFJKKaXUP1VVVVVVVdU/AAAAAAAA1T+WWqmVWqnVP1VVVVVVVdU/F341JtID1T+mpaWlpaXVP1VVVVVVVdU/Fl/xFV/x1T9ItMLmQKLVP1VVVVVVVdU/r169evXq1T/yWTeYIp/VP1VVVVVVVdU/2FBeQ3kN1T/sHVMDe8fUP1VVVVVVVdU/ICod2TMQ1T/NzMzMzMzUPwaebs0Pi9Q/S9SuRO1K1D/6tYdcVgzUPyVJkiRJktQ/VFRUVFRU1D8GfUFf0BfUPwnLPY2w3NM/o4suuuii0z83talNbWrTP5Q+6ZM+6dM/VEZlVEZl1D9DFrKQhSzUP6WUUkoppdQ/Ut/ZqO9s1D8mTv2eW+LUP1VVVVVVVdU/Ffji6gcd1T85BS+n4OXUP1VVVVVVVdU/H4XrUbge1T/L8I0oMOnUP7W0tLS0tNQ/k/OyiE8g1T/FTuzETuzUP5VLuZRLudQ/E4y3ss8h1T9RGh+ZQO/UP1VVVVVVVdU/NFIxtzoj1T+HtW9Y+4bVP1VVVVVVVdU/SZIkSZIk1T/DSk8trPTUP1pMZzGdxdQ/SeXDuF+X1D/mnkZY7mnUP9RDPdRDPdQ/J3VfHlsR1D+U3W+U3W/UP0RERERERNQ/69khcGMZ1D8mQ7CONu/TP0vUrkTtStQ/IYQQQggh1D97FK5H4XrUPxRFURRFUdQ/CoVCoVAo1D8AAAAAAADUP/aEPWFP2NM/FDuxEzux0z+Hae6Cv4rTP+GDDz744NM/qzut7rS60z9+NSbSA5XTP/42xajhb9M/S0tLS0tL0z8xNguqPSfTPzDXDsy1A9M/UfxFzrDg0j8zMzMzMzPTP0yuICZXENM/K2wOJFph0z8UO7ETO7HTPwAAAAAAANQ/Ccs9jbDc0z+hQoUKFSrUPwJl4kr3BtQ/RT7rBlPk0z8M1XTMJcLTP6DTBjptoNM/n65P16fr0z+ivIbyGsrTP1T+qFP+qNM/zspPiLPy0z/SExw9wdHTPxQ7sRM7sdM/Qbgb+x6R0z/jJszvanHTP8F4K/scUtM/MzMzMzMz0z9Wigm6qxTTP2gvob2E9tI/n6lcd7zY0j+7ErUrUbvSP54S5ClBntI/", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "customdata": [ - [ - "A paper about AI regulation that was originally su" - ], - [ - "If we assume all articles published by Nature in 2" - ], - [ - "In Unlambda, what exact charcter or text needs to " - ], - [ - "Iโ€™m researching species that became invasive after" - ], - [ - "The attached spreadsheet shows the inventory for a" - ], - [ - "How many studio albums were published by Mercedes " - ], - [ - "If Eliud Kipchoge could maintain his record-making" - ], - [ - "The object in the British Museum's collection with" - ], - [ - "According to github, when was Regression added to " - ], - [ - "Here's a fun riddle that I think you'll enjoy.\n\nYo" - ], - [ - "Using the Biopython library in Python, parse the P" - ], - [ - "What are the EC numbers of the two most commonly u" - ], - [ - "In July 2, 1959 United States standards for grades" - ], - [ - "In April of 1977, who was the Prime Minister of th" - ], - [ - "Use density measures from the chemistry materials " - ], - [ - "What was the volume in m^3 of the fish bag that wa" - ], - [ - "What is the average number of pre-2020 works on th" - ], - [ - "In the video https://www.youtube.com/watch?v=L1vXC" - ], - [ - "Of the authors (First M. Last) that worked on the " - ], - [ - "When you take the average of the standard populati" - ], - [ - "Assuming scientists in the famous youtube video Th" - ], - [ - "In Series 9, Episode 11 of Doctor Who, the Doctor " - ], - [ - "In terms of geographical distance between capital " - ], - [ - "In the NCATS PubChem compound database for Food Ad" - ], - [ - "I need to fact-check a citation. This is the citat" - ], - [ - "Which contributor to the version of OpenCV where s" - ], - [ - "What integer-rounded percentage of the total lengt" - ], - [ - "An office held a Secret Santa gift exchange where " - ], - [ - "What is the maximum length in meters of #9 in the " - ], - [ - "What two-word type of model did Manash Pratim Kash" - ], - [ - "What animals that were mentioned in both Ilias Lag" - ], - [ - "How many High Energy Physics - Lattice articles li" - ], - [ - "The photograph in the Whitney Museum of American A" - ], - [ - ".rewsna eht sa \"tfel\" drow eht fo etisoppo eht eti" - ], - [ - "What is the minimum number of page links a person " - ], - [ - "Each cell in the attached spreadsheet represents a" - ], - [ - "Which of the text elements under CATEGORIES in the" - ], - [ - "I went to Virtue restaurant & bar in Chicago for m" - ], - [ - "ยฌ(A โˆง B) โ†” (ยฌA โˆจ ยฌB)\nยฌ(A โˆจ B) โ†” (ยฌA โˆง ยฌB)\n(A โ†’ B) " - ], - [ - "My family reunion is this week, and I was assigned" - ], - [ - "In Emily Midkiff's June 2014 article in a journal " - ], - [ - "It is 1999. Before you party like it is 1999, plea" - ], - [ - "Under DDC 633 on Bielefeld University Library's BA" - ] - ], - "hovertemplate": "agent_name=code_qwen-coder-32B_03_february_text
index=%{x}
is_correct=%{y}
question=%{customdata[0]}", - "legendgroup": "code_qwen-coder-32B_03_february_text", - "line": { - "color": "#EF553B", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "code_qwen-coder-32B_03_february_text", - "showlegend": true, - "type": "scattergl", - "x": { - "bdata": "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKg==", - "dtype": "i1" - }, - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAADgP1VVVVVVVdU/AAAAAAAA0D+amZmZmZnZP1VVVVVVVdU/kiRJkiRJ0j8AAAAAAADQPxzHcRzHccw/mpmZmZmZyT9GF1100UXHP1VVVVVVVcU/FDuxEzuxwz+SJEmSJEnCP5qZmZmZmck/AAAAAAAA0D8eHh4eHh7OPxzHcRzHccw/KK+hvIbyyj+amZmZmZnJP57neZ7nec4/F1100UUXzT+96U1vetPLP6uqqqqqqso/mpmZmZmZyT/ZiZ3YiZ3IP0J7Ce0ltMc/t23btm3bxj98GmG5pxHGP1VVVVVVVcU/pZRSSimlxD8AAAAAAADEP2WTTTbZZMM/l5aWlpaWxj8WX/EVX/HFPzmO4ziO48g/doMp8lk3yD9DeQ3lNZTHPxqkQRqkQco/zczMzMzMzD8ZnI/B+RjMP9u2bdu2bcs/s6asKWvKyj8=", - "dtype": "f8" - }, - "yaxis": "y" - } - ], - "layout": { - "legend": { - "title": { - "text": "agent_name" - }, - "tracegroupgap": 0 - }, - "margin": { - "t": 60 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermap": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermap" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "index" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "is_correct" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import plotly.express as px\n", "\n", @@ -10788,9 +245,6 @@ "\n", "\n", "cumulative_df[\"question\"] = cumulative_df.apply(find_question, axis=1)\n", - "# cumulative_df[\"question\"] = [el[:50] for el in sel_df[\"question\"].values]\n", - "\n", - "# cumulative_df[\"is_correct\"] = cumulative_df[\"is_correct\"] * (165 - 68) / 165\n", "\n", "px.line(\n", " cumulative_df,\n", @@ -10810,19 +264,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "165\n" - ] - } - ], + "outputs": [], "source": [ - "sel_df = result_df.loc[result_df[\"agent_name\"] == o1]\n", + "sel_df = result_df.loc[result_df[\"agent_name\"] == \"o1\"]\n", "print(len(sel_df))" ] }, @@ -10835,56 +281,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n", - "\n", - "\n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - "\n", - "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n", - "\n", - "\n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - "\n", - "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n", - "\n", - "\n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - "\n", - "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:10: SettingWithCopyWarning:\n", - "\n", - "\n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - "\n", - "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/2022001392.py:11: SettingWithCopyWarning:\n", - "\n", - "\n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -10916,890 +315,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "is_correct=False
variable=%{x}
Average count=%{y}", - "legendgroup": "False", - "marker": { - "color": "#636efa", - "pattern": { - "shape": "" - } - }, - "name": "False", - "orientation": "v", - "showlegend": true, - "textposition": "outside", - "type": "bar", - "x": [ - "AgentParsingError", - "AgentExecutionError", - "AgentMaxIterationsError", - "AgentGenerationError", - "Count steps" - ], - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACSJEmSJEkMQA==", - "dtype": "f8" - }, - "yaxis": "y" - }, - { - "hovertemplate": "is_correct=True
variable=%{x}
Average count=%{y}", - "legendgroup": "True", - "marker": { - "color": "#EF553B", - "pattern": { - "shape": "" - } - }, - "name": "True", - "orientation": "v", - "showlegend": true, - "textposition": "outside", - "type": "bar", - "x": [ - "AgentParsingError", - "AgentExecutionError", - "AgentMaxIterationsError", - "AgentGenerationError", - "Count steps" - ], - "xaxis": "x", - "y": { - "bdata": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABPt+aHRcoIQA==", - "dtype": "f8" - }, - "yaxis": "y" - } - ], - "layout": { - "bargroupgap": 0, - "barmode": "group", - "height": 500, - "legend": { - "title": { - "text": "is_correct" - }, - "tracegroupgap": 0 - }, - "margin": { - "t": 60 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermap": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermap" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "width": 800, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "variable" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Average count" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import plotly.express as px\n", "\n", @@ -11841,153 +359,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
is_correctcount_stepsquestion
attachment_type
None0.4237994.9597252185
csv0.0000007.75000016
docx0.5714294.90476221
jpg0.1428575.75000028
jsonld0.0000006.60000015
mp30.4800004.50000050
pdb0.0000004.44444418
pdf0.5882354.13725551
png0.2167834.412587143
pptx0.8823534.05882417
py1.0000004.26666715
txt0.7058824.76470617
xlsx0.6127454.823529204
zip0.4482765.34482829
\n", - "
" - ], - "text/plain": [ - " is_correct count_steps question\n", - "attachment_type \n", - "None 0.423799 4.959725 2185\n", - "csv 0.000000 7.750000 16\n", - "docx 0.571429 4.904762 21\n", - "jpg 0.142857 5.750000 28\n", - "jsonld 0.000000 6.600000 15\n", - "mp3 0.480000 4.500000 50\n", - "pdb 0.000000 4.444444 18\n", - "pdf 0.588235 4.137255 51\n", - "png 0.216783 4.412587 143\n", - "pptx 0.882353 4.058824 17\n", - "py 1.000000 4.266667 15\n", - "txt 0.705882 4.764706 17\n", - "xlsx 0.612745 4.823529 204\n", - "zip 0.448276 5.344828 29" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(\n", " result_df.groupby([\"attachment_type\"])[[\"is_correct\", \"count_steps\", \"question\"]].agg(\n", @@ -12005,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -12015,52 +389,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "agent_name\n", - "code_gpt4o_03_february_goodoldtext-unbroken 38.36\n", - "code_gpt4o_03_february_magenticbrowser 35.22\n", - "code_gpt4o_03_february_magenticbrowser2 36.54\n", - "code_gpt4o_03_february_text 37.58\n", - "code_o1_01_february_text 49.09\n", - "code_o1_03_february_ablation-toolcalling-manager 32.73\n", - "code_o1_03_february_fix-print-outputs 51.83\n", - "code_o1_03_february_fix-print-outputs2 55.77\n", - "code_o1_03_february_goodoldtext-unbroken 53.42\n", - "code_o1_03_february_remove-navigational 53.66\n", - "code_o1_03_february_text_high-reasoning-effort 48.48\n", - "code_o1_04_february_submission 49.38\n", - "code_o1_04_february_submission5 55.15\n", - "code_o3-mini_03_february_remove-navigational 29.09\n", - "Name: is_correct, dtype: float64" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Majority score: 58.18\n", - "Oracle score: 72.73\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/ipykernel_25011/3287428472.py:20: DeprecationWarning:\n", - "\n", - "DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "def majority_vote(df):\n", " df = df[(df[\"prediction\"] != \"Unable to determine\") & (~df[\"prediction\"].isna()) & (df[\"prediction\"] != \"None\")]\n", @@ -12100,7 +431,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -12112,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -12129,9 +460,9 @@ ], "metadata": { "kernelspec": { - "display_name": "gaia", + "display_name": "test", "language": "python", - "name": "python3" + "name": "test" }, "language_info": { "codemirror_mode": { diff --git a/examples/open_deep_research/app.py b/examples/open_deep_research/app.py new file mode 100644 index 000000000..a7f884faa --- /dev/null +++ b/examples/open_deep_research/app.py @@ -0,0 +1,11 @@ +from run import create_agent + +from smolagents.gradio_ui import GradioUI + + +agent = create_agent() + +demo = GradioUI(agent) + +if __name__ == "__main__": + demo.launch() diff --git a/examples/open_deep_research/requirements.txt b/examples/open_deep_research/requirements.txt index a18936ae4..4fe0e0e2a 100644 --- a/examples/open_deep_research/requirements.txt +++ b/examples/open_deep_research/requirements.txt @@ -1,4 +1,5 @@ anthropic>=0.37.1 +audioop-lts<1.0; python_version >= "3.13" # required to use pydub in Python >=3.13; LTS port of the removed Python builtin module audioop beautifulsoup4>=4.12.3 datasets>=2.21.0 google_search_results>=2.4.2 diff --git a/examples/open_deep_research/run.py b/examples/open_deep_research/run.py index 2dcddab4f..be1ad38a5 100644 --- a/examples/open_deep_research/run.py +++ b/examples/open_deep_research/run.py @@ -11,7 +11,6 @@ FindNextTool, PageDownTool, PageUpTool, - SearchInformationTool, SimpleTextBrowser, VisitTool, ) @@ -19,38 +18,13 @@ from smolagents import ( CodeAgent, - # HfApiModel, + GoogleSearchTool, + # InferenceClientModel, LiteLLMModel, ToolCallingAgent, ) -AUTHORIZED_IMPORTS = [ - "requests", - "zipfile", - "os", - "pandas", - "numpy", - "sympy", - "json", - "bs4", - "pubchempy", - "xml", - "yahoo_finance", - "Bio", - "sklearn", - "scipy", - "pydub", - "io", - "PIL", - "chess", - "PyPDF2", - "pptx", - "torch", - "datetime", - "fractions", - "csv", -] load_dotenv(override=True) login(os.getenv("HF_TOKEN")) @@ -83,22 +57,20 @@ def parse_args(): os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True) -def main(): - args = parse_args() - text_limit = 100000 - - model = LiteLLMModel( - args.model_id, - custom_role_conversions=custom_role_conversions, - max_completion_tokens=8192, - reasoning_effort="high", - ) - document_inspection_tool = TextInspectorTool(model, text_limit) +def create_agent(model_id="o1"): + model_params = { + "model_id": model_id, + "custom_role_conversions": custom_role_conversions, + "max_completion_tokens": 8192, + } + if model_id == "o1": + model_params["reasoning_effort"] = "high" + model = LiteLLMModel(**model_params) + text_limit = 100000 browser = SimpleTextBrowser(**BROWSER_CONFIG) - WEB_TOOLS = [ - SearchInformationTool(browser), + GoogleSearchTool(provider="serper"), VisitTool(browser), PageUpTool(browser), PageDownTool(browser), @@ -107,7 +79,6 @@ def main(): ArchiveSearchTool(browser), TextInspectorTool(model, text_limit), ] - text_webbrowser_agent = ToolCallingAgent( model=model, tools=WEB_TOOLS, @@ -129,15 +100,23 @@ def main(): manager_agent = CodeAgent( model=model, - tools=[visualizer, document_inspection_tool], + tools=[visualizer, TextInspectorTool(model, text_limit)], max_steps=12, verbosity_level=2, - additional_authorized_imports=AUTHORIZED_IMPORTS, + additional_authorized_imports=["*"], planning_interval=4, managed_agents=[text_webbrowser_agent], ) - answer = manager_agent.run(args.question) + return manager_agent + + +def main(): + args = parse_args() + + agent = create_agent(model_id=args.model_id) + + answer = agent.run(args.question) print(f"Got this answer: {answer}") diff --git a/examples/open_deep_research/run_gaia.py b/examples/open_deep_research/run_gaia.py index fa59fc03e..192081787 100644 --- a/examples/open_deep_research/run_gaia.py +++ b/examples/open_deep_research/run_gaia.py @@ -1,3 +1,4 @@ +# EXAMPLE COMMAND: python examples/open_deep_research/run_gaia.py --concurrency 32 --run-name generate-traces-03-apr-noplanning --model-id gpt-4o import argparse import json import os @@ -5,7 +6,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from pathlib import Path -from typing import List import datasets import pandas as pd @@ -23,7 +23,6 @@ FindNextTool, PageDownTool, PageUpTool, - SearchInformationTool, SimpleTextBrowser, VisitTool, ) @@ -32,39 +31,13 @@ from smolagents import ( CodeAgent, - # HfApiModel, + GoogleSearchTool, LiteLLMModel, Model, ToolCallingAgent, ) -AUTHORIZED_IMPORTS = [ - "requests", - "zipfile", - "os", - "pandas", - "numpy", - "sympy", - "json", - "bs4", - "pubchempy", - "xml", - "yahoo_finance", - "Bio", - "sklearn", - "scipy", - "pydub", - "io", - "PIL", - "chess", - "PyPDF2", - "pptx", - "torch", - "datetime", - "fractions", - "csv", -] load_dotenv(override=True) login(os.getenv("HF_TOKEN")) @@ -121,14 +94,14 @@ def preprocess_file_paths(row): os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True) -def create_agent_hierarchy(model: Model): +def create_agent_team(model: Model): text_limit = 100000 ti_tool = TextInspectorTool(model, text_limit) browser = SimpleTextBrowser(**BROWSER_CONFIG) WEB_TOOLS = [ - SearchInformationTool(browser), + GoogleSearchTool(provider="serper"), VisitTool(browser), PageUpTool(browser), PageDownTool(browser), @@ -137,6 +110,7 @@ def create_agent_hierarchy(model: Model): ArchiveSearchTool(browser), TextInspectorTool(model, text_limit), ] + text_webbrowser_agent = ToolCallingAgent( model=model, tools=WEB_TOOLS, @@ -161,7 +135,7 @@ def create_agent_hierarchy(model: Model): tools=[visualizer, ti_tool], max_steps=12, verbosity_level=2, - additional_authorized_imports=AUTHORIZED_IMPORTS, + additional_authorized_imports=["*"], planning_interval=4, managed_agents=[text_webbrowser_agent], ) @@ -178,21 +152,20 @@ def append_answer(entry: dict, jsonl_file: str) -> None: def answer_single_question(example, model_id, answers_file, visual_inspection_tool): - model = LiteLLMModel( - model_id, - custom_role_conversions=custom_role_conversions, - max_completion_tokens=8192, - reasoning_effort="high", - ) - # model = HfApiModel("Qwen/Qwen2.5-72B-Instruct", provider="together") - # "https://lnxyuvj02bpe6mam.us-east-1.aws.endpoints.huggingface.cloud", - # custom_role_conversions=custom_role_conversions, - # # provider="sambanova", - # max_tokens=8096, - # ) + model_params = { + "model_id": model_id, + "custom_role_conversions": custom_role_conversions, + } + if model_id == "o1": + model_params["reasoning_effort"] = "high" + model_params["max_completion_tokens"] = 8192 + else: + model_params["max_tokens"] = 4096 + model = LiteLLMModel(**model_params) + # model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=4096) document_inspection_tool = TextInspectorTool(model, 100000) - agent = create_agent_hierarchy(model) + agent = create_agent_team(model) augmented_question = """You have one question to answer. It is paramount that you provide a correct answer. Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded. @@ -218,14 +191,14 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to # Run agent ๐Ÿš€ final_result = agent.run(augmented_question) - agent_memory = agent.write_memory_to_messages(summary_mode=True) + agent_memory = agent.write_memory_to_messages() final_result = prepare_response(augmented_question, agent_memory, reformulation_model=model) output = str(final_result) for memory_step in agent.memory.steps: memory_step.model_input_messages = None - intermediate_steps = [str(step) for step in agent.memory.steps] + intermediate_steps = agent_memory # Check for parsing errors which indicate the LLM failed to follow the required format parsing_error = True if any(["AgentParsingError" in step for step in intermediate_steps]) else False @@ -243,6 +216,12 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to exception = e raised_exception = True end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + token_counts_manager = agent.monitor.get_total_token_counts() + token_counts_web = list(agent.managed_agents.values())[0].monitor.get_total_token_counts() + total_token_counts = { + "input": token_counts_manager["input"] + token_counts_web["input"], + "output": token_counts_manager["output"] + token_counts_web["output"], + } annotated_example = { "agent_name": model.model_id, "question": example["question"], @@ -252,16 +231,17 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to "parsing_error": parsing_error, "iteration_limit_exceeded": iteration_limit_exceeded, "agent_error": str(exception) if raised_exception else None, - "start_time": start_time, - "end_time": end_time, "task": example["task"], "task_id": example["task_id"], "true_answer": example["true_answer"], + "start_time": start_time, + "end_time": end_time, + "token_counts": total_token_counts, } append_answer(annotated_example, answers_file) -def get_examples_to_answer(answers_file, eval_ds) -> List[dict]: +def get_examples_to_answer(answers_file, eval_ds) -> list[dict]: print(f"Loading answers from {answers_file}...") try: done_questions = pd.read_json(answers_file, lines=True)["question"].tolist() diff --git a/examples/open_deep_research/scripts/mdconvert.py b/examples/open_deep_research/scripts/mdconvert.py index 68f13a28b..939cd121a 100644 --- a/examples/open_deep_research/scripts/mdconvert.py +++ b/examples/open_deep_research/scripts/mdconvert.py @@ -14,7 +14,7 @@ import tempfile import traceback import zipfile -from typing import Any, Dict, List, Optional, Union +from typing import Any from urllib.parse import parse_qs, quote, unquote, urlparse, urlunparse import mammoth @@ -112,22 +112,22 @@ def convert_soup(self, soup: Any) -> str: class DocumentConverterResult: """The result of converting a document to text.""" - def __init__(self, title: Union[str, None] = None, text_content: str = ""): - self.title: Union[str, None] = title + def __init__(self, title: str | None = None, text_content: str = ""): + self.title: str | None = title self.text_content: str = text_content class DocumentConverter: """Abstract superclass of all DocumentConverters.""" - def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult: raise NotImplementedError() class PlainTextConverter(DocumentConverter): """Anything with content type text/plain""" - def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult: # Guess the content type from any file extension that might be around content_type, _ = mimetypes.guess_type("__placeholder" + kwargs.get("file_extension", "")) @@ -149,7 +149,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert class HtmlConverter(DocumentConverter): """Anything with content type text/html""" - def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult: # Bail if not html extension = kwargs.get("file_extension", "") if extension.lower() not in [".html", ".htm"]: @@ -161,7 +161,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert return result - def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]: + def _convert(self, html_content: str) -> None | DocumentConverterResult: """Helper function that converts and HTML string.""" # Parse the string @@ -189,7 +189,7 @@ def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]: class WikipediaConverter(DocumentConverter): """Handle Wikipedia pages separately, focusing only on the main document content.""" - def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult: # Bail if not Wikipedia extension = kwargs.get("file_extension", "") if extension.lower() not in [".html", ".htm"]: @@ -234,7 +234,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert class YouTubeConverter(DocumentConverter): """Handle YouTube specially, focusing on the video title, description, and transcript.""" - def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult: # Bail if not YouTube extension = kwargs.get("file_extension", "") if extension.lower() not in [".html", ".htm"]: @@ -250,7 +250,7 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert # Read the meta tags assert soup.title is not None and soup.title.string is not None - metadata: Dict[str, str] = {"title": soup.title.string} + metadata: dict[str, str] = {"title": soup.title.string} for meta in soup(["meta"]): for a in meta.attrs: if a in ["itemprop", "property", "name"]: @@ -328,13 +328,13 @@ def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConvert text_content=webpage_text, ) - def _get(self, metadata: Dict[str, str], keys: List[str], default: Union[str, None] = None) -> Union[str, None]: + def _get(self, metadata: dict[str, str], keys: list[str], default: str | None = None) -> str | None: for k in keys: if k in metadata: return metadata[k] return default - def _findKey(self, json: Any, key: str) -> Union[str, None]: # TODO: Fix json type + def _findKey(self, json: Any, key: str) -> str | None: # TODO: Fix json type if isinstance(json, list): for elm in json: ret = self._findKey(elm, key) @@ -356,7 +356,7 @@ class PdfConverter(DocumentConverter): Converts PDFs to Markdown. Most style information is ignored, so the results are essentially plain-text. """ - def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + def convert(self, local_path, **kwargs) -> None | DocumentConverterResult: # Bail if not a PDF extension = kwargs.get("file_extension", "") if extension.lower() != ".pdf": @@ -373,7 +373,7 @@ class DocxConverter(HtmlConverter): Converts DOCX files to Markdown. Style information (e.g.m headings) and tables are preserved where possible. """ - def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + def convert(self, local_path, **kwargs) -> None | DocumentConverterResult: # Bail if not a DOCX extension = kwargs.get("file_extension", "") if extension.lower() != ".docx": @@ -393,7 +393,7 @@ class XlsxConverter(HtmlConverter): Converts XLSX files to Markdown, with each sheet presented as a separate Markdown table. """ - def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + def convert(self, local_path, **kwargs) -> None | DocumentConverterResult: # Bail if not a XLSX extension = kwargs.get("file_extension", "") if extension.lower() not in [".xlsx", ".xls"]: @@ -417,7 +417,7 @@ class PptxConverter(HtmlConverter): Converts PPTX files to Markdown. Supports heading, tables and images with alt text. """ - def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + def convert(self, local_path, **kwargs) -> None | DocumentConverterResult: # Bail if not a PPTX extension = kwargs.get("file_extension", "") if extension.lower() != ".pptx": @@ -520,7 +520,7 @@ class WavConverter(MediaConverter): Converts WAV files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` is installed). """ - def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + def convert(self, local_path, **kwargs) -> None | DocumentConverterResult: # Bail if not a XLSX extension = kwargs.get("file_extension", "") if extension.lower() != ".wav": @@ -570,7 +570,7 @@ class Mp3Converter(WavConverter): Converts MP3 and M4A files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` AND `pydub` are installed). """ - def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + def convert(self, local_path, **kwargs) -> None | DocumentConverterResult: # Bail if not a MP3 extension = kwargs.get("file_extension", "") if extension.lower() not in [".mp3", ".m4a"]: @@ -644,7 +644,7 @@ def __init__(self, extract_dir: str = "downloads"): # Create the extraction directory if it doesn't exist os.makedirs(self.extract_dir, exist_ok=True) - def convert(self, local_path: str, **kwargs: Any) -> Union[None, DocumentConverterResult]: + def convert(self, local_path: str, **kwargs: Any) -> None | DocumentConverterResult: # Bail if not a ZIP file extension = kwargs.get("file_extension", "") if extension.lower() != ".zip": @@ -681,7 +681,7 @@ class ImageConverter(MediaConverter): Converts images to markdown via extraction of metadata (if `exiftool` is installed), OCR (if `easyocr` is installed), and description via a multimodal LLM (if an mlm_client is configured). """ - def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: + def convert(self, local_path, **kwargs) -> None | DocumentConverterResult: # Bail if not a XLSX extension = kwargs.get("file_extension", "") if extension.lower() not in [".jpg", ".jpeg", ".png"]: @@ -771,9 +771,9 @@ class MarkdownConverter: def __init__( self, - requests_session: Optional[requests.Session] = None, - mlm_client: Optional[Any] = None, - mlm_model: Optional[Any] = None, + requests_session: requests.Session | None = None, + mlm_client: Any | None = None, + mlm_model: Any | None = None, ): if requests_session is None: self._requests_session = requests.Session() @@ -783,7 +783,7 @@ def __init__( self._mlm_client = mlm_client self._mlm_model = mlm_model - self._page_converters: List[DocumentConverter] = [] + self._page_converters: list[DocumentConverter] = [] # Register converters for successful browsing operations # Later registrations are tried first / take higher priority than earlier registrations @@ -802,7 +802,7 @@ def __init__( self.register_page_converter(PdfConverter()) def convert( - self, source: Union[str, requests.Response], **kwargs: Any + self, source: str | requests.Response, **kwargs: Any ) -> DocumentConverterResult: # TODO: deal with kwargs """ Args: @@ -924,7 +924,7 @@ def convert_response( return result - def _convert(self, local_path: str, extensions: List[Union[str, None]], **kwargs) -> DocumentConverterResult: + def _convert(self, local_path: str, extensions: list[str | None], **kwargs) -> DocumentConverterResult: error_trace = "" for ext in extensions + [None]: # Try last with no extension for converter in self._page_converters: diff --git a/examples/open_deep_research/scripts/text_inspector_tool.py b/examples/open_deep_research/scripts/text_inspector_tool.py index 056168cee..2b1e18ae5 100644 --- a/examples/open_deep_research/scripts/text_inspector_tool.py +++ b/examples/open_deep_research/scripts/text_inspector_tool.py @@ -1,9 +1,5 @@ -from typing import Optional - from smolagents import Tool -from smolagents.models import MessageRole, Model - -from .mdconvert import MarkdownConverter +from smolagents.models import Model class TextInspectorTool(Tool): @@ -24,14 +20,18 @@ class TextInspectorTool(Tool): }, } output_type = "string" - md_converter = MarkdownConverter() - def __init__(self, model: Model, text_limit: int): + def __init__(self, model: Model = None, text_limit: int = 100000): super().__init__() self.model = model self.text_limit = text_limit + from .mdconvert import MarkdownConverter + + self.md_converter = MarkdownConverter() def forward_initial_exam_mode(self, file_path, question): + from smolagents.models import MessageRole + result = self.md_converter.convert(file_path) if file_path[-4:] in [".png", ".jpg"]: @@ -73,7 +73,9 @@ def forward_initial_exam_mode(self, file_path, question): ] return self.model(messages).content - def forward(self, file_path, question: Optional[str] = None) -> str: + def forward(self, file_path, question: str | None = None) -> str: + from smolagents.models import MessageRole + result = self.md_converter.convert(file_path) if file_path[-4:] in [".png", ".jpg"]: diff --git a/examples/open_deep_research/scripts/text_web_browser.py b/examples/open_deep_research/scripts/text_web_browser.py index ef40f8551..044128edb 100644 --- a/examples/open_deep_research/scripts/text_web_browser.py +++ b/examples/open_deep_research/scripts/text_web_browser.py @@ -6,7 +6,7 @@ import re import time import uuid -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any from urllib.parse import unquote, urljoin, urlparse import pathvalidate @@ -24,19 +24,19 @@ class SimpleTextBrowser: def __init__( self, - start_page: Optional[str] = None, - viewport_size: Optional[int] = 1024 * 8, - downloads_folder: Optional[Union[str, None]] = None, - serpapi_key: Optional[Union[str, None]] = None, - request_kwargs: Optional[Union[Dict[str, Any], None]] = None, + start_page: str | None = None, + viewport_size: int | None = 1024 * 8, + downloads_folder: str | None | None = None, + serpapi_key: str | None | None = None, + request_kwargs: dict[str, Any] | None | None = None, ): self.start_page: str = start_page if start_page else "about:blank" self.viewport_size = viewport_size # Applies only to the standard uri types self.downloads_folder = downloads_folder - self.history: List[Tuple[str, float]] = list() - self.page_title: Optional[str] = None + self.history: list[tuple[str, float]] = list() + self.page_title: str | None = None self.viewport_current_page = 0 - self.viewport_pages: List[Tuple[int, int]] = list() + self.viewport_pages: list[tuple[int, int]] = list() self.set_address(self.start_page) self.serpapi_key = serpapi_key self.request_kwargs = request_kwargs @@ -44,15 +44,15 @@ def __init__( self._mdconvert = MarkdownConverter() self._page_content: str = "" - self._find_on_page_query: Union[str, None] = None - self._find_on_page_last_result: Union[int, None] = None # Location of the last result + self._find_on_page_query: str | None = None + self._find_on_page_last_result: int | None = None # Location of the last result @property def address(self) -> str: """Return the address of the current page.""" return self.history[-1][0] - def set_address(self, uri_or_path: str, filter_year: Optional[int] = None) -> None: + def set_address(self, uri_or_path: str, filter_year: int | None = None) -> None: # TODO: Handle anchors self.history.append((uri_or_path, time.time())) @@ -102,7 +102,7 @@ def page_down(self) -> None: def page_up(self) -> None: self.viewport_current_page = max(self.viewport_current_page - 1, 0) - def find_on_page(self, query: str) -> Union[str, None]: + def find_on_page(self, query: str) -> str | None: """Searches for the query from the current viewport forward, looping back to the start if necessary.""" # Did we get here via a previous find_on_page search with the same query? @@ -121,7 +121,7 @@ def find_on_page(self, query: str) -> Union[str, None]: self._find_on_page_last_result = viewport_match return self.viewport - def find_next(self) -> Union[str, None]: + def find_next(self) -> str | None: """Scroll to the next viewport that matches the query""" if self._find_on_page_query is None: @@ -144,7 +144,7 @@ def find_next(self) -> Union[str, None]: self._find_on_page_last_result = viewport_match return self.viewport - def _find_next_viewport(self, query: str, starting_viewport: int) -> Union[int, None]: + def _find_next_viewport(self, query: str, starting_viewport: int) -> int | None: """Search for matches between the starting viewport looping when reaching the end.""" if query is None: @@ -174,7 +174,7 @@ def _find_next_viewport(self, query: str, starting_viewport: int) -> Union[int, return None - def visit_page(self, path_or_uri: str, filter_year: Optional[int] = None) -> str: + def visit_page(self, path_or_uri: str, filter_year: int | None = None) -> str: """Update the address, visit the page, and return the content of the viewport.""" self.set_address(path_or_uri, filter_year=filter_year) return self.viewport @@ -201,7 +201,7 @@ def _split_pages(self) -> None: self.viewport_pages.append((start_idx, end_idx)) start_idx = end_idx - def _serpapi_search(self, query: str, filter_year: Optional[int] = None) -> None: + def _serpapi_search(self, query: str, filter_year: int | None = None) -> None: if self.serpapi_key is None: raise ValueError("Missing SerpAPI key.") @@ -231,7 +231,7 @@ def _prev_visit(url): return f"You previously visited this page {round(time.time() - self.history[i][1])} seconds ago.\n" return "" - web_snippets: List[str] = list() + web_snippets: list[str] = list() idx = 0 if "organic_results" in results: for page in results["organic_results"]: @@ -352,7 +352,7 @@ def _fetch_page(self, url: str) -> None: self.page_title = "Error" self._set_page_content(f"## Error\n\n{str(request_exception)}") - def _state(self) -> Tuple[str, str]: + def _state(self) -> tuple[str, str]: header = f"Address: {self.address}\n" if self.page_title is not None: header += f"Title: {self.page_title}\n" @@ -385,7 +385,7 @@ def __init__(self, browser): super().__init__() self.browser = browser - def forward(self, query: str, filter_year: Optional[int] = None) -> str: + def forward(self, query: str, filter_year: int | None = None) -> str: self.browser.visit_page(f"google: {query}", filter_year=filter_year) header, content = self.browser._state() return header.strip() + "\n=======================\n" + content @@ -397,7 +397,7 @@ class VisitTool(Tool): inputs = {"url": {"type": "string", "description": "The relative or absolute url of the webpage to visit."}} output_type = "string" - def __init__(self, browser): + def __init__(self, browser=None): super().__init__() self.browser = browser @@ -421,6 +421,8 @@ def __init__(self, browser): self.browser = browser def forward(self, url: str) -> str: + import requests + if "arxiv" in url: url = url.replace("abs", "pdf") response = requests.get(url) @@ -452,11 +454,13 @@ class ArchiveSearchTool(Tool): } output_type = "string" - def __init__(self, browser): + def __init__(self, browser=None): super().__init__() self.browser = browser def forward(self, url, date) -> str: + import requests + no_timestamp_url = f"https://archive.org/wayback/available?url={url}" archive_url = no_timestamp_url + f"×tamp={date}" response = requests.get(archive_url).json() @@ -487,7 +491,7 @@ class PageUpTool(Tool): inputs = {} output_type = "string" - def __init__(self, browser): + def __init__(self, browser=None): super().__init__() self.browser = browser @@ -505,7 +509,7 @@ class PageDownTool(Tool): inputs = {} output_type = "string" - def __init__(self, browser): + def __init__(self, browser=None): super().__init__() self.browser = browser @@ -526,7 +530,7 @@ class FinderTool(Tool): } output_type = "string" - def __init__(self, browser): + def __init__(self, browser=None): super().__init__() self.browser = browser @@ -549,7 +553,7 @@ class FindNextTool(Tool): inputs = {} output_type = "string" - def __init__(self, browser): + def __init__(self, browser=None): super().__init__() self.browser = browser diff --git a/examples/open_deep_research/scripts/visual_qa.py b/examples/open_deep_research/scripts/visual_qa.py index 84d240b66..01d60b30a 100644 --- a/examples/open_deep_research/scripts/visual_qa.py +++ b/examples/open_deep_research/scripts/visual_qa.py @@ -4,23 +4,21 @@ import os import uuid from io import BytesIO -from typing import Optional +import PIL.Image import requests from dotenv import load_dotenv from huggingface_hub import InferenceClient -from PIL import Image -from transformers import AutoProcessor from smolagents import Tool, tool load_dotenv(override=True) -idefics_processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b-chatty") - def process_images_and_text(image_path, query, client): + from transformers import AutoProcessor + messages = [ { "role": "user", @@ -30,7 +28,7 @@ def process_images_and_text(image_path, query, client): ], }, ] - + idefics_processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b-chatty") prompt_with_template = idefics_processor.apply_chat_template(messages, add_generation_prompt=True) # load images from local directory @@ -38,7 +36,7 @@ def process_images_and_text(image_path, query, client): # encode images to strings which can be sent to the endpoint def encode_local_image(image_path): # load image - image = Image.open(image_path).convert("RGB") + image = PIL.Image.open(image_path).convert("RGB") # Convert the image to a base64 string buffer = BytesIO() @@ -95,11 +93,8 @@ def encode_image(image_path): return base64.b64encode(image_file.read()).decode("utf-8") -headers = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"} - - def resize_image(image_path): - img = Image.open(image_path) + img = PIL.Image.open(image_path) width, height = img.size img = img.resize((int(width / 2), int(height / 2))) new_image_path = f"resized_{image_path}" @@ -121,7 +116,7 @@ class VisualQATool(Tool): client = InferenceClient("HuggingFaceM4/idefics2-8b-chatty") - def forward(self, image_path: str, question: Optional[str] = None) -> str: + def forward(self, image_path: str, question: str | None = None) -> str: output = "" add_note = False if not question: @@ -144,13 +139,19 @@ def forward(self, image_path: str, question: Optional[str] = None) -> str: @tool -def visualizer(image_path: str, question: Optional[str] = None) -> str: +def visualizer(image_path: str, question: str | None = None) -> str: """A tool that can answer questions about attached images. Args: image_path: The path to the image on which to answer the question. This should be a local path to downloaded image. question: The question to answer. """ + import mimetypes + import os + + import requests + + from .visual_qa import encode_image add_note = False if not question: @@ -175,6 +176,7 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str: ], "max_tokens": 1000, } + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}"} response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) try: output = response.json()["choices"][0]["message"]["content"] diff --git a/examples/open_deep_research/visual_vs_text_browser.ipynb b/examples/open_deep_research/visual_vs_text_browser.ipynb index 9bb4ee8dc..4a85a465a 100644 --- a/examples/open_deep_research/visual_vs_text_browser.ipynb +++ b/examples/open_deep_research/visual_vs_text_browser.ipynb @@ -102,7 +102,7 @@ "from smolagents import CodeAgent, LiteLLMModel\n", "\n", "\n", - "proprietary_model = LiteLLMModel(\"gpt-4o\")" + "proprietary_model = LiteLLMModel(model_id=\"gpt-4o\")" ] }, { @@ -178,7 +178,7 @@ ")\n", "\n", "\n", - "proprietary_model = LiteLLMModel(\"gpt-4o\")\n", + "proprietary_model = LiteLLMModel(model_id=\"gpt-4o\")\n", "vision_browser_agent = initialize_agent(proprietary_model)\n", "### BUILD AGENTS & TOOLS\n", "\n", diff --git a/examples/rag.py b/examples/rag.py index f5a2e2cb1..3ff572fb3 100644 --- a/examples/rag.py +++ b/examples/rag.py @@ -28,11 +28,11 @@ class RetrieverTool(Tool): name = "retriever" - description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query." + description = "Uses lexical search to retrieve the parts of transformers documentation that could be most relevant to answer your query." inputs = { "query": { "type": "string", - "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.", + "description": "The query to perform. This should be lexically close to your target documents. Use the affirmative form rather than a question.", } } output_type = "string" @@ -52,13 +52,13 @@ def forward(self, query: str) -> str: ) -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel retriever_tool = RetrieverTool(docs_processed) agent = CodeAgent( tools=[retriever_tool], - model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"), + model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbosity_level=2, ) diff --git a/examples/rag_using_chromadb.py b/examples/rag_using_chromadb.py index 864bfc848..fa2764355 100644 --- a/examples/rag_using_chromadb.py +++ b/examples/rag_using_chromadb.py @@ -97,8 +97,8 @@ def forward(self, query: str) -> str: # Choose which LLM engine to use! -# from smolagents import HfApiModel -# model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct") +# from smolagents import InferenceClientModel +# model = InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct") # from smolagents import TransformersModel # model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct") diff --git a/examples/sandboxed_execution.py b/examples/sandboxed_execution.py new file mode 100644 index 000000000..25e4fb771 --- /dev/null +++ b/examples/sandboxed_execution.py @@ -0,0 +1,12 @@ +from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel + + +model = InferenceClientModel() + +agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="docker") +output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") +print("Docker executor result:", output) + +agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="e2b") +output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") +print("E2B executor result:", output) diff --git a/examples/smolagents_benchmark/run.py b/examples/smolagents_benchmark/run.py new file mode 100644 index 000000000..f2b60eb58 --- /dev/null +++ b/examples/smolagents_benchmark/run.py @@ -0,0 +1,254 @@ +import argparse +import datetime +import json +import os +import threading +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +import datasets +import pandas as pd +from dotenv import load_dotenv +from tqdm import tqdm + +from smolagents import ( + AgentError, + CodeAgent, + GoogleSearchTool, + InferenceClientModel, + LiteLLMModel, + PythonInterpreterTool, + ToolCallingAgent, + VisitWebpageTool, +) +from smolagents.agents import ActionStep + + +load_dotenv() +os.makedirs("output", exist_ok=True) + +APPEND_ANSWER_LOCK = threading.Lock() + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Runs an agent powered by the given model on smolagent benchmark.") + parser.add_argument( + "--date", + type=str, + default=None, + help="The date for the evaluation.", + ) + parser.add_argument( + "--eval-dataset", + type=str, + default="smolagents/benchmark-v1", + ) + # The eval dataset is gated, so you must first visit its page to request access: https://huggingface.co/datasets/smolagents-benchmark/benchmark-v1 + parser.add_argument( + "--model-type", + type=str, + default="InferenceClientModel", + choices=["LiteLLMModel", "InferenceClientModel"], + help="The model type to use (LiteLLMModel or InferenceClientModel)", + ) + parser.add_argument( + "--model-id", + type=str, + required=True, + help="The model ID to use for the specified model type", + ) + parser.add_argument( + "--provider", + type=str, + default="hf-inference", + help="The provider for InferenceClientModel - will not be used for LiteLLMModel", + ) + parser.add_argument( + "--agent-action-type", + type=str, + default="code", + choices=["code", "tool-calling", "vanilla"], + help="The agent action type: 'code', 'tool-calling', or 'vanilla' to use the vanilla llm", + ) + parser.add_argument( + "--parallel-workers", + type=int, + default=8, + help="The number of processes to run in parallel", + ) + parser.add_argument( + "--push-answers-to-hub", + action="store_true", + default=False, + help="Push the answers to the hub", + ) + parser.add_argument( + "--answers-dataset", + type=str, + default="smolagents/answers", + ) + return parser.parse_args() + + +def load_eval_dataset(eval_dataset): + # Choose the tasks to evaluate on: + # tasks = ["gaia"] + # or evaluate on all tasks: ["gaia", "math", "simpleqa"] + tasks = datasets.get_dataset_config_names(eval_dataset) + print(tasks) + + eval_ds = {task: datasets.load_dataset(eval_dataset, task, split="test") for task in tasks} + print(pd.DataFrame(eval_ds["simpleqa"]).head()) + return eval_ds + + +def serialize_agent_error(obj): + if isinstance(obj, AgentError): + return {"error_type": obj.__class__.__name__, "message": obj.message} + else: + return str(obj) + + +def append_answer(entry: dict, jsonl_file: str) -> None: + jsonl_file = Path(jsonl_file) + jsonl_file.parent.mkdir(parents=True, exist_ok=True) + with APPEND_ANSWER_LOCK, open(jsonl_file, "a", encoding="utf-8") as fp: + fp.write(json.dumps(entry) + "\n") + assert os.path.exists(jsonl_file), "File not found!" + + +def answer_single_question(example, model, answers_file, action_type): + if action_type == "vanilla": + agent = model + elif action_type == "code": + agent = CodeAgent( + tools=[GoogleSearchTool(provider="serper"), VisitWebpageTool()], + model=model, + additional_authorized_imports=["numpy", "sympy"], + max_steps=10, + ) + elif action_type == "tool-calling": + agent = ToolCallingAgent( + tools=[GoogleSearchTool(provider="serper"), VisitWebpageTool(), PythonInterpreterTool()], + model=model, + additional_authorized_imports=["numpy", "sympy"], + max_steps=10, + ) + + augmented_question = example["question"] + if example["source"] == "SimpleQA": + augmented_question += " Answer with only the final number." + if example["source"] == "MATH": + augmented_question += " Write code, not latex." + + start_time = time.time() + + try: + if action_type == "vanilla": + answer = agent([{"role": "user", "content": augmented_question}]).content + token_counts = agent.monitor.get_total_token_counts() + intermediate_steps = answer + else: + # Run agent ๐Ÿš€ + answer = str(agent.run(augmented_question)) + token_counts = agent.monitor.get_total_token_counts() + # Remove memory from logs to make them more compact. + for step in agent.memory.steps: + if isinstance(step, ActionStep): + step.agent_memory = None + intermediate_steps = str(agent.memory.steps) + + end_time = time.time() + except Exception as e: + print("Error on ", augmented_question, e) + intermediate_steps = [] + token_counts = {"input": 0, "output": 0} + answer = str(e) + end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + annotated_example = { + "model_id": model.model_id, + "agent_action_type": action_type, + "question": augmented_question, + "original_question": example["question"], + "answer": answer, + "true_answer": example["true_answer"], + "source": example["source"], + "intermediate_steps": intermediate_steps, + "start_time": start_time, + "end_time": end_time, + "token_counts": token_counts, + } + append_answer(annotated_example, answers_file) + + +def answer_questions( + eval_ds, + model, + date, + action_type: str = "code", + output_dir: str = "output", + answers_dataset: str = None, + push_answers_to_hub: bool = False, + parallel_workers: int = 32, +): + date = date or datetime.date.today().isoformat() + model_id = model.model_id + + for task in eval_ds: + file_name = f"{output_dir}/{model_id.replace('/', '__')}__{action_type}__{task}__{date}.jsonl" + print(f"Starting processing and writing output to '{file_name}'") + answered_questions = [] + if os.path.exists(file_name): + with open(file_name, "r") as f: + for line in f: + answered_questions.append(json.loads(line)["original_question"]) + + examples_todo = [example for example in eval_ds[task] if example["question"] not in answered_questions] + print(f"Launching {parallel_workers} parallel workers.") + + with ThreadPoolExecutor(max_workers=parallel_workers) as exe: + futures = [ + exe.submit(answer_single_question, example, model, file_name, action_type) for example in examples_todo + ] + for f in tqdm(as_completed(futures), total=len(examples_todo), desc="Processing tasks"): + f.result() + + print("All tasks processed.") + + if push_answers_to_hub and answers_dataset: + print("Pushing answers to hub...") + ds = datasets.Dataset.from_pandas(pd.read_json(file_name, lines=True), split="test", preserve_index=False) + config = f"{model_id.replace('/', '__')}__{action_type}__{task}" + data_dir = f"{model_id}/{action_type}/{task}/{date}" + ds.push_to_hub( + answers_dataset, + config_name=config, + data_dir=data_dir, + split="test", + commit_message=f"Upload {config}", + ) + + +if __name__ == "__main__": + args = parse_arguments() + + eval_ds = load_eval_dataset(args.eval_dataset) + + if args.model_type == "LiteLLMModel": + model = LiteLLMModel( + model_id=args.model_id, + max_completion_tokens=8192, + ) + else: + model = InferenceClientModel(model_id=args.model_id, provider=args.provider, max_tokens=8192) + + answer_questions( + eval_ds, + model, + args.date, + action_type=args.agent_action_type, + answers_dataset=args.answers_dataset, + push_answers_to_hub=args.push_answers_to_hub, + parallel_workers=args.parallel_workers, + ) diff --git a/examples/smolagents_benchmark/score.ipynb b/examples/smolagents_benchmark/score.ipynb new file mode 100644 index 000000000..b624d802c --- /dev/null +++ b/examples/smolagents_benchmark/score.ipynb @@ -0,0 +1,392 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -e .. datasets sympy numpy matplotlib seaborn -q # Install dev version of smolagents + some packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Benchmark date\n", + "# - set a concrete date:\n", + "DATE = \"2024-12-26\"\n", + "# - or use default: today\n", + "# DATE = None\n", + "\n", + "# Evaluation dataset\n", + "# - the dataset is gated, so you must first visit its page to request access: https://huggingface.co/datasets/smolagents-benchmark/benchmark-v1\n", + "EVAL_DATASET = \"smolagents-benchmark/benchmark-v1\"\n", + "\n", + "# Answers dataset: it must be a gated dataset; required to score the answers\n", + "ANSWERS_DATASET = \"smolagents-benchmark/answers\"\n", + "# Whether to push the answers dataset to the Hub\n", + "PUSH_ANSWERS_DATASET_TO_HUB = True\n", + "\n", + "# Results dataset\n", + "RESULTS_DATASET = \"smolagents-benchmark/results\"\n", + "# Whether to push the results dataset to the Hub\n", + "PUSH_RESULTS_DATASET_TO_HUB = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constants and utilities/tools" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import string\n", + "import warnings\n", + "from concurrent.futures import ThreadPoolExecutor, as_completed\n", + "from datetime import datetime\n", + "\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "\n", + "\n", + "def normalize_number_str(number_str: str) -> float:\n", + " # we replace these common units and commas to allow\n", + " # conversion to float\n", + " for char in [\"$\", \"%\", \",\"]:\n", + " number_str = number_str.replace(char, \"\")\n", + " try:\n", + " return float(number_str)\n", + " except ValueError:\n", + " return float(\"inf\")\n", + "\n", + "\n", + "def split_string(\n", + " s: str,\n", + " char_list: list[str] = [\",\", \";\"],\n", + ") -> list[str]:\n", + " pattern = f\"[{''.join(char_list)}]\"\n", + " return re.split(pattern, s)\n", + "\n", + "\n", + "def is_float(element: any) -> bool:\n", + " try:\n", + " float(element)\n", + " return True\n", + " except ValueError:\n", + " return False\n", + "\n", + "\n", + "def normalize_str(input_str, remove_punct=True) -> str:\n", + " \"\"\"\n", + " Normalize a string by:\n", + " - Removing all white spaces\n", + " - Optionally removing punctuation (if remove_punct is True)\n", + " - Converting to lowercase\n", + " Parameters:\n", + " - input_str: str, the string to normalize\n", + " - remove_punct: bool, whether to remove punctuation (default: True)\n", + " Returns:\n", + " - str, the normalized string\n", + " \"\"\"\n", + " # Remove all white spaces. Required e.g for seagull vs. sea gull\n", + " no_spaces = re.sub(r\"\\s\", \"\", input_str)\n", + "\n", + " # Remove punctuation, if specified.\n", + " if remove_punct:\n", + " translator = str.maketrans(\"\", \"\", string.punctuation)\n", + " return no_spaces.lower().translate(translator)\n", + " else:\n", + " return no_spaces.lower()\n", + "\n", + "\n", + "def extract_numbers(text: str) -> list[str]:\n", + " \"\"\"This pattern matches:\n", + " - Optional negative sign\n", + " - Numbers with optional comma thousand separators\n", + " - Optional decimal points with decimal numbers\n", + " \"\"\"\n", + " pattern = r\"-?(?:\\d{1,3}(?:,\\d{3})+|\\d+)(?:\\.\\d+)?\"\n", + "\n", + " return [el.replace(\",\", \"\") for el in re.findall(pattern, text)]\n", + "\n", + "\n", + "def get_question_score_gaia(\n", + " model_answer: str,\n", + " ground_truth: str,\n", + ") -> bool:\n", + " \"\"\"Scoring function used to score functions from the GAIA benchmark\"\"\"\n", + " if is_float(ground_truth):\n", + " normalized_answer = normalize_number_str(str(model_answer))\n", + " return normalized_answer == float(ground_truth)\n", + "\n", + " elif any(char in ground_truth for char in [\",\", \";\"]): # if gt is a list\n", + " # question with the fish: normalization removes punct\n", + " gt_elems = split_string(ground_truth)\n", + " ma_elems = split_string(model_answer)\n", + "\n", + " if len(gt_elems) != len(ma_elems): # check length is the same\n", + " warnings.warn(\"Answer lists have different lengths, returning False.\", UserWarning)\n", + " return False\n", + "\n", + " comparisons = []\n", + " for ma_elem, gt_elem in zip(ma_elems, gt_elems): # compare each element as float or str\n", + " if is_float(gt_elem):\n", + " normalized_ma_elem = normalize_number_str(ma_elem)\n", + " comparisons.append(normalized_ma_elem == float(gt_elem))\n", + " else:\n", + " # we do not remove punct since comparisons can include punct\n", + " comparisons.append(\n", + " normalize_str(ma_elem, remove_punct=False) == normalize_str(gt_elem, remove_punct=False)\n", + " )\n", + " return all(comparisons)\n", + "\n", + " else: # if gt is a str\n", + " return normalize_str(model_answer) == normalize_str(ground_truth)\n", + "\n", + "\n", + "def get_correct(row):\n", + " if row[\"source\"] == \"MATH\": # Checks the last number in answer\n", + " numbers_answer = extract_numbers(str(row[\"answer\"]))\n", + " if len(numbers_answer) == 0:\n", + " return False\n", + " return np.isclose(float(numbers_answer[-1]), float(row[\"true_answer\"]), rtol=1e-5, atol=1e-7)\n", + " else:\n", + " return get_question_score_gaia(str(row[\"answer\"]), str(row[\"true_answer\"]))\n", + "\n", + "\n", + "def score_answers_subset(answers_dataset, answers_subset):\n", + " try:\n", + " print(answers_dataset, answers_subset)\n", + " *model_id, action_type, task = answers_subset.split(\"__\")\n", + " model_id = \"/\".join(model_id)\n", + " ds = datasets.load_dataset(answers_dataset, answers_subset, split=\"test\")\n", + " df = ds.to_pandas()\n", + " df[\"correct\"] = df.apply(get_correct, axis=1)\n", + " assert df[\"correct\"].notnull().sum() > 30, \"Missing answers\"\n", + " acc = df[\"correct\"].mean().item()\n", + " result = df.loc[0, [\"model_id\", \"agent_action_type\", \"source\"]].to_dict()\n", + " result[\"acc\"] = acc\n", + " return result\n", + " except Exception as e:\n", + " print(f\"Error with {answers_subset}: {e}\")\n", + " return None\n", + "\n", + "\n", + "def score_answers(\n", + " answers_subsets,\n", + " answers_dataset=ANSWERS_DATASET,\n", + " date=DATE,\n", + " push_to_hub_dataset=RESULTS_DATASET if PUSH_RESULTS_DATASET_TO_HUB else None,\n", + " set_default=True,\n", + "):\n", + " if not answers_dataset:\n", + " raise ValueError(\"Pass 'answers_dataset' to load the answers from it\")\n", + " date = date or datetime.date.today().isoformat()\n", + " results = []\n", + " with ThreadPoolExecutor(max_workers=16) as exe:\n", + " futures = [\n", + " exe.submit(score_answers_subset, answers_dataset, answers_subset) for answers_subset in answers_subsets\n", + " ]\n", + " for f in tqdm(as_completed(futures), total=len(answers_subsets), desc=\"Processing tasks\"):\n", + " result = f.result()\n", + " if result:\n", + " results.append(result)\n", + " df = pd.DataFrame(results)\n", + "\n", + " if push_to_hub_dataset:\n", + " ds = datasets.Dataset.from_pandas(df)\n", + " config = date\n", + " set_default = set_default\n", + " ds.push_to_hub(\n", + " push_to_hub_dataset, config_name=config, set_default=set_default, commit_message=f\"Upload {config} results\"\n", + " )\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Score answers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import datasets\n", + "import pandas as pd\n", + "\n", + "\n", + "# Choose the answers subsets to score:\n", + "# answers_subsets = [\"meta-llama__Llama-3.1-8B-Instruct__code__gaia\"]\n", + "# or get all the answers subsets present in the ANSWERS_DATASET\n", + "answers_subsets = datasets.get_dataset_config_names(ANSWERS_DATASET)\n", + "print(\"Number of answers_subsets\", len(answers_subsets))\n", + "print(\"Example of answers_subset\", answers_subsets[0])\n", + "\n", + "result_df = score_answers(answers_subsets)\n", + "result_df[\"acc\"] = (result_df[\"acc\"] * 100).round(2)\n", + "result_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pivot_df = result_df.pivot_table(\n", + " index=[\"model_id\", \"source\"],\n", + " columns=[\"agent_action_type\"],\n", + " values=\"acc\",\n", + " fill_value=float(\"nan\"),\n", + ").reset_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Display results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(pivot_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from matplotlib.legend_handler import HandlerTuple # Added import\n", + "\n", + "\n", + "# Assuming pivot_df is your original dataframe\n", + "models = pivot_df[\"model_id\"].unique()\n", + "sources = pivot_df[\"source\"].unique()\n", + "\n", + "# Create figure and axis\n", + "plt.style.use(\"seaborn-v0_8-white\")\n", + "fig, ax = plt.subplots(figsize=(15, 6))\n", + "\n", + "# Set the width of each bar group and positions of the bars\n", + "width = 0.15 # width of each bar\n", + "spacing = 0.02 # space between bars within a group\n", + "group_spacing = 0.2 # space between model groups\n", + "\n", + "# Calculate positions for the bars\n", + "num_sources = len(sources)\n", + "total_width_per_group = (width + spacing) * num_sources * 2 # *2 for agent and vanilla\n", + "x = np.arange(len(models)) * (total_width_per_group + group_spacing)\n", + "\n", + "# Plot bars for each source\n", + "for i, source in enumerate(sources):\n", + " source_data = pivot_df[pivot_df[\"source\"] == source]\n", + " agent_scores = [\n", + " source_data[source_data[\"model_id\"] == model][\"code\"].values[0]\n", + " if len(source_data[source_data[\"model_id\"] == model]) > 0\n", + " else np.nan\n", + " for model in models\n", + " ]\n", + " vanilla_scores = [\n", + " source_data[source_data[\"model_id\"] == model][\"vanilla\"].values[0]\n", + " if len(source_data[source_data[\"model_id\"] == model]) > 0\n", + " else np.nan\n", + " for model in models\n", + " ]\n", + "\n", + " # Position calculation for each pair of bars\n", + " pos = x + i * (width * 2 + spacing)\n", + "\n", + " agent_bars = ax.bar(pos, agent_scores, width, label=f\"{source} (Agent)\", alpha=0.8)\n", + " vanilla_bars = ax.bar(\n", + " pos + width * 0.6,\n", + " vanilla_scores,\n", + " width,\n", + " hatch=\"////\",\n", + " alpha=0.5,\n", + " hatch_linewidth=2,\n", + " label=f\"{source} (Vanilla)\",\n", + " color=\"white\",\n", + " edgecolor=agent_bars[0].get_facecolor(),\n", + " )\n", + "\n", + "# Customize the plot\n", + "ax.set_ylabel(\"Score\")\n", + "ax.set_title(\"Model Performance Comparison\")\n", + "\n", + "# Set x-axis ticks in the middle of each group\n", + "group_centers = x + (total_width_per_group - spacing) / 2\n", + "ax.set_xticks(group_centers)\n", + "\n", + "# Wrap long model names to prevent overlap\n", + "wrapped_labels = [\"\\n\".join(model.split(\"/\")) for model in models]\n", + "ax.set_xticklabels(wrapped_labels, rotation=0, ha=\"center\")\n", + "\n", + "# Modify legend to combine agent and vanilla entries\n", + "handles, labels = ax.get_legend_handles_labels()\n", + "unique_sources = sources\n", + "legend_elements = [\n", + " (handles[i * 2], handles[i * 2 + 1], labels[i * 2].replace(\" (Agent)\", \"\")) for i in range(len(unique_sources))\n", + "]\n", + "custom_legend = ax.legend(\n", + " [(agent_handle, vanilla_handle) for agent_handle, vanilla_handle, _ in legend_elements],\n", + " [label for _, _, label in legend_elements],\n", + " handler_map={tuple: HandlerTuple(ndivide=None)},\n", + " bbox_to_anchor=(1.05, 1),\n", + " loc=\"upper left\",\n", + ")\n", + "\n", + "ax.yaxis.grid(True, linestyle=\"--\", alpha=0.3)\n", + "ax.set_ylim(bottom=0)\n", + "plt.tight_layout()\n", + "ax.spines[\"top\"].set_visible(False)\n", + "ax.spines[\"right\"].set_visible(False)\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "test", + "language": "python", + "name": "test" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/text_to_sql.py b/examples/text_to_sql.py index c25f0caa0..1b5bd3d6d 100644 --- a/examples/text_to_sql.py +++ b/examples/text_to_sql.py @@ -69,11 +69,11 @@ def sql_engine(query: str) -> str: return output -from smolagents import CodeAgent, HfApiModel +from smolagents import CodeAgent, InferenceClientModel agent = CodeAgent( tools=[sql_engine], - model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"), + model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct"), ) agent.run("Can you give me the name of the client who got the most expensive receipt?") diff --git a/pyproject.toml b/pyproject.toml index ab323f8a1..0db6ab2b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,10 +4,10 @@ build-backend = "setuptools.build_meta" [project] name = "smolagents" -version = "1.10.0.dev0" +version = "1.15.0.dev0" description = "๐Ÿค— smolagents: a barebones library for agents. Agents write python code to call tools or orchestrate other agents." authors = [ - { name="Aymeric Roucher", email="aymeric@hf.co" }, { name="Thomas Wolf"}, + { name="Aymeric Roucher", email="aymeric@hf.co" }, ] readme = "README.md" requires-python = ">=3.10" @@ -15,7 +15,6 @@ dependencies = [ "huggingface-hub>=0.28.0", "requests>=2.32.3", "rich>=13.9.4", - "pandas>=2.2.3", "jinja2>=3.1.4", "pillow>=11.0.0", "markdownify>=0.14.1", @@ -24,14 +23,22 @@ dependencies = [ ] [project.optional-dependencies] +bedrock = [ + "boto3>=1.36.18" +] torch = [ "torch", "torchvision", + "numpy>=1.21.2", ] audio = [ "soundfile", "smolagents[torch]", ] +docker = [ + "docker>=7.1.0", + "websocket-client", +] e2b = [ "e2b-code-interpreter>=1.0.3", "python-dotenv>=1.0.1", @@ -43,7 +50,7 @@ litellm = [ "litellm>=1.60.2", ] mcp = [ - "mcpadapt>=0.0.6", + "mcpadapt>=0.0.19", # Security fix "mcp", ] mlx-lm = [ @@ -60,21 +67,32 @@ telemetry = [ ] transformers = [ "accelerate", - "transformers>=4.0.0,<4.49.0", + "transformers>=4.0.0", "smolagents[torch]", ] +vision = [ + "helium", + "selenium", +] +vllm = [ + "vllm", + "torch" +] all = [ - "smolagents[audio,e2b,gradio,litellm,mcp,openai,telemetry,transformers]", + "smolagents[audio,docker,e2b,gradio,litellm,mcp,mlx-lm,openai,telemetry,transformers,vision,bedrock]", ] quality = [ "ruff>=0.9.0", ] test = [ "ipython>=8.31.0", # for interactive environment tests + "pandas>=2.2.3", "pytest>=8.1.0", + "pytest-datadir", "python-dotenv>=1.0.1", # For test_all_docs "smolagents[all]", "rank-bm25", # For test_all_docs + "Wikipedia-API>=0.8.1", ] dev = [ "smolagents[quality,test]", @@ -107,4 +125,4 @@ lines-after-imports = 2 [project.scripts] smolagent = "smolagents.cli:main" -webagent = "smolagents.vision_web_browser:main" \ No newline at end of file +webagent = "smolagents.vision_web_browser:main" diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py index a1321eb1b..be4c3c19e 100644 --- a/src/smolagents/__init__.py +++ b/src/smolagents/__init__.py @@ -14,17 +14,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.10.0.dev0" +__version__ = "1.15.0.dev0" from .agent_types import * # noqa: I001 from .agents import * # Above noqa avoids a circular dependency due to cli.py from .default_tools import * -from .e2b_executor import * from .gradio_ui import * from .local_python_executor import * +from .mcp_client import * from .memory import * from .models import * from .monitoring import * +from .remote_executors import * from .tools import * from .utils import * from .cli import * diff --git a/src/smolagents/_function_type_hints_utils.py b/src/smolagents/_function_type_hints_utils.py index dddd90d0c..e5a367c08 100644 --- a/src/smolagents/_function_type_hints_utils.py +++ b/src/smolagents/_function_type_hints_utils.py @@ -26,26 +26,19 @@ import json import re import types +from collections.abc import Callable from copy import copy from typing import ( Any, - Callable, - Dict, - List, - Optional, - Tuple, + Literal, Union, get_args, get_origin, get_type_hints, ) -from huggingface_hub.utils import is_torch_available -from .utils import _is_pillow_available - - -def get_imports(code: str) -> List[str]: +def get_imports(code: str) -> list[str]: """ Extracts all the libraries (not relative imports) that are imported in a code. @@ -83,7 +76,7 @@ class DocstringParsingException(Exception): """Exception raised for errors in parsing docstrings to generate JSON schemas""" -def get_json_schema(func: Callable) -> Dict: +def get_json_schema(func: Callable) -> dict: """ This function generates a JSON schema for a given function, based on its docstring and type hints. This is mostly used for passing lists of tools to a chat template. The JSON schema contains the name and description of @@ -221,26 +214,30 @@ def get_json_schema(func: Callable) -> Dict: # Extracts the initial segment of the docstring, containing the function description -description_re = re.compile(r"^(.*?)[\n\s]*(Args:|Returns:|Raises:|\Z)", re.DOTALL) +description_re = re.compile(r"^(.*?)(?=\n\s*(Args:|Returns:|Raises:)|\Z)", re.DOTALL) # Extracts the Args: block from the docstring args_re = re.compile(r"\n\s*Args:\n\s*(.*?)[\n\s]*(Returns:|Raises:|\Z)", re.DOTALL) # Splits the Args: block into individual arguments args_split_re = re.compile( - r""" -(?:^|\n) # Match the start of the args block, or a newline -\s*(\w+)\s*(?:\([^)]*\))?:\s* # Capture the argument name (ignore the type) and strip spacing -(.*?)\s* # Capture the argument description, which can span multiple lines, and strip trailing spacing -(?=\n\s*\w+:|\Z) # Stop when you hit the next argument or the end of the block -""", + r"(?:^|\n)" # Match the start of the args block, or a newline + r"\s*(\w+)\s*(?:\([^)]*?\))?:\s*" # Capture the argument name (ignore the type) and strip spacing + r"(.*?)\s*" # Capture the argument description, which can span multiple lines, and strip trailing spacing + r"(?=\n\s*\w+\s*(?:\([^)]*?\))?:|\Z)", # Stop when you hit the next argument (with or without type) or the end of the block re.DOTALL | re.VERBOSE, ) # Extracts the Returns: block from the docstring, if present. Note that most chat templates ignore the return type/doc! -returns_re = re.compile(r"\n\s*Returns:\n\s*(.*?)[\n\s]*(Raises:|\Z)", re.DOTALL) +returns_re = re.compile( + r"\n\s*Returns:\n\s*" + r"(?:[^)]*?:\s*)?" # Ignore the return type if present + r"(.*?)" # Capture the return description + r"[\n\s]*(Raises:|\Z)", + re.DOTALL, +) def _parse_google_format_docstring( docstring: str, -) -> Tuple[Optional[str], Optional[Dict], Optional[str]]: +) -> tuple[str | None, dict | None, str | None]: """ Parses a Google-style docstring to extract the function description, argument descriptions, and return description. @@ -273,7 +270,7 @@ def _parse_google_format_docstring( return description, args_dict, returns -def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hints: bool = True) -> Dict: +def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hints: bool = True) -> dict: type_hints = get_type_hints(func) signature = inspect.signature(func) @@ -300,7 +297,7 @@ def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hin return schema -def _parse_type_hint(hint: str) -> Dict: +def _parse_type_hint(hint: str) -> dict: origin = get_origin(hint) args = get_args(hint) @@ -314,20 +311,7 @@ def _parse_type_hint(hint: str) -> Dict: ) elif origin is Union or (hasattr(types, "UnionType") and origin is types.UnionType): - # Recurse into each of the subtypes in the Union, except None, which is handled separately at the end - subtypes = [_parse_type_hint(t) for t in args if t is not type(None)] - if len(subtypes) == 1: - # A single non-null type can be expressed directly - return_dict = subtypes[0] - elif all(isinstance(subtype["type"], str) for subtype in subtypes): - # A union of basic types can be expressed as a list in the schema - return_dict = {"type": sorted([subtype["type"] for subtype in subtypes])} - else: - # A union of more complex types requires "anyOf" - return_dict = {"anyOf": subtypes} - if type(None) in args: - return_dict["nullable"] = True - return return_dict + return _parse_union_type(args) elif origin is list: if not args: @@ -363,9 +347,33 @@ def _parse_type_hint(hint: str) -> Dict: out["additionalProperties"] = _parse_type_hint(args[1]) return out + elif origin is Literal: + literal_types = set(type(arg) for arg in args) + final_type = _parse_union_type(literal_types) + + # None literal value is represented by 'nullable' field set by _parse_union_type + final_type.update({"enum": [arg for arg in args if arg is not None]}) + return final_type + raise TypeHintParsingException("Couldn't parse this type hint, likely due to a custom class or object: ", hint) +def _parse_union_type(args: tuple[Any, ...]) -> dict: + subtypes = [_parse_type_hint(t) for t in args if t is not type(None)] + if len(subtypes) == 1: + # A single non-null type can be expressed directly + return_dict = subtypes[0] + elif all(isinstance(subtype["type"], str) for subtype in subtypes): + # A union of basic types can be expressed as a list in the schema + return_dict = {"type": sorted([subtype["type"] for subtype in subtypes])} + else: + # A union of more complex types requires "anyOf" + return_dict = {"anyOf": subtypes} + if type(None) in args: + return_dict["nullable"] = True + return return_dict + + _BASE_TYPE_MAPPING = { int: {"type": "integer"}, float: {"type": "number"}, @@ -376,17 +384,20 @@ def _parse_type_hint(hint: str) -> Dict: } -def _get_json_schema_type(param_type: str) -> Dict[str, str]: +def _get_json_schema_type(param_type: str) -> dict[str, str]: if param_type in _BASE_TYPE_MAPPING: return copy(_BASE_TYPE_MAPPING[param_type]) - if str(param_type) == "Image" and _is_pillow_available(): + if str(param_type) == "Image": from PIL.Image import Image if param_type == Image: return {"type": "image"} - if str(param_type) == "Tensor" and is_torch_available(): - from torch import Tensor + if str(param_type) == "Tensor": + try: + from torch import Tensor - if param_type == Tensor: - return {"type": "audio"} + if param_type == Tensor: + return {"type": "audio"} + except ModuleNotFoundError: + pass return {"type": "object"} diff --git a/src/smolagents/agent_types.py b/src/smolagents/agent_types.py index b0d4ee1d1..73772292e 100644 --- a/src/smolagents/agent_types.py +++ b/src/smolagents/agent_types.py @@ -19,11 +19,8 @@ import uuid from io import BytesIO -import numpy as np +import PIL.Image import requests -from huggingface_hub.utils import is_torch_available -from PIL import Image -from PIL.Image import Image as ImageType from .utils import _is_package_available @@ -37,7 +34,7 @@ class AgentType: These objects serve three purposes: - - They behave as they were the type they're meant to be, e.g., a string for text, a PIL.Image for images + - They behave as they were the type they're meant to be, e.g., a string for text, a PIL.Image.Image for images - They can be stringified: str(object) in order to return a string defining the object - They should be displayed correctly in ipython notebooks/colab/jupyter """ @@ -73,14 +70,14 @@ def to_string(self): return str(self._value) -class AgentImage(AgentType, ImageType): +class AgentImage(AgentType, PIL.Image.Image): """ - Image type returned by the agent. Behaves as a PIL.Image. + Image type returned by the agent. Behaves as a PIL.Image.Image. """ def __init__(self, value): AgentType.__init__(self, value) - ImageType.__init__(self) + PIL.Image.Image.__init__(self) self._path = None self._raw = None @@ -88,19 +85,24 @@ def __init__(self, value): if isinstance(value, AgentImage): self._raw, self._path, self._tensor = value._raw, value._path, value._tensor - elif isinstance(value, ImageType): + elif isinstance(value, PIL.Image.Image): self._raw = value elif isinstance(value, bytes): - self._raw = Image.open(BytesIO(value)) + self._raw = PIL.Image.open(BytesIO(value)) elif isinstance(value, (str, pathlib.Path)): self._path = value - elif is_torch_available(): - import torch + else: + try: + import torch + + if isinstance(value, torch.Tensor): + self._tensor = value + import numpy as np - if isinstance(value, torch.Tensor): - self._tensor = value - if isinstance(value, np.ndarray): - self._tensor = torch.from_numpy(value) + if isinstance(value, np.ndarray): + self._tensor = torch.from_numpy(value) + except ModuleNotFoundError: + pass if self._path is None and self._raw is None and self._tensor is None: raise TypeError(f"Unsupported type for {self.__class__.__name__}: {type(value)}") @@ -115,18 +117,20 @@ def _ipython_display_(self, include=None, exclude=None): def to_raw(self): """ - Returns the "raw" version of that object. In the case of an AgentImage, it is a PIL.Image. + Returns the "raw" version of that object. In the case of an AgentImage, it is a PIL.Image.Image. """ if self._raw is not None: return self._raw if self._path is not None: - self._raw = Image.open(self._path) + self._raw = PIL.Image.open(self._path) return self._raw if self._tensor is not None: + import numpy as np + array = self._tensor.cpu().detach().numpy() - return Image.fromarray((255 - array * 255).astype(np.uint8)) + return PIL.Image.fromarray((255 - array * 255).astype(np.uint8)) def to_string(self): """ @@ -143,10 +147,12 @@ def to_string(self): return self._path if self._tensor is not None: + import numpy as np + array = self._tensor.cpu().detach().numpy() # There is likely simpler than load into image into save - img = Image.fromarray((255 - array * 255).astype(np.uint8)) + img = PIL.Image.fromarray((255 - array * 255).astype(np.uint8)) directory = tempfile.mkdtemp() self._path = os.path.join(directory, str(uuid.uuid4()) + ".png") @@ -172,10 +178,11 @@ class AgentAudio(AgentType, str): """ def __init__(self, value, samplerate=16_000): - if not _is_package_available("soundfile") or not is_torch_available(): + if not _is_package_available("soundfile") or not _is_package_available("torch"): raise ModuleNotFoundError( "Please install 'audio' extra to use AgentAudio: `pip install 'smolagents[audio]'`" ) + import numpy as np import torch super().__init__(value) @@ -186,7 +193,7 @@ def __init__(self, value, samplerate=16_000): self.samplerate = samplerate if isinstance(value, (str, pathlib.Path)): self._path = value - elif is_torch_available() and isinstance(value, torch.Tensor): + elif isinstance(value, torch.Tensor): self._tensor = value elif isinstance(value, tuple): self.samplerate = value[0] @@ -261,13 +268,15 @@ def handle_agent_output_types(output, output_type=None): # If the class does not have defined output, then we map according to the type if isinstance(output, str): return AgentText(output) - if isinstance(output, ImageType): + if isinstance(output, PIL.Image.Image): return AgentImage(output) - if is_torch_available(): + try: import torch if isinstance(output, torch.Tensor): return AgentAudio(output) + except ModuleNotFoundError: + pass return output diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py index a4d1b08f8..8ff8eb230 100644 --- a/src/smolagents/agents.py +++ b/src/smolagents/agents.py @@ -22,40 +22,49 @@ import tempfile import textwrap import time +from abc import ABC, abstractmethod from collections import deque +from collections.abc import Callable, Generator from logging import getLogger from pathlib import Path -from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, TypedDict, Union +from typing import TYPE_CHECKING, Any, TypedDict import jinja2 import yaml from huggingface_hub import create_repo, metadata_update, snapshot_download, upload_folder from jinja2 import StrictUndefined, Template from rich.console import Group +from rich.live import Live +from rich.markdown import Markdown from rich.panel import Panel from rich.rule import Rule from rich.text import Text -from .agent_types import AgentAudio, AgentImage, AgentType, handle_agent_output_types + +if TYPE_CHECKING: + import PIL.Image + +from .agent_types import AgentAudio, AgentImage, handle_agent_output_types from .default_tools import TOOL_MAPPING, FinalAnswerTool -from .e2b_executor import E2BExecutor -from .local_python_executor import ( - BASE_BUILTIN_MODULES, - LocalPythonInterpreter, - fix_final_answer_code, -) -from .memory import ActionStep, AgentMemory, PlanningStep, SystemPromptStep, TaskStep, ToolCall -from .models import ( - ChatMessage, - MessageRole, - Model, +from .local_python_executor import BASE_BUILTIN_MODULES, LocalPythonExecutor, PythonExecutor, fix_final_answer_code +from .memory import ( + ActionStep, + AgentMemory, + FinalAnswerStep, + Message, + PlanningStep, + SystemPromptStep, + TaskStep, + ToolCall, ) +from .models import ChatMessage, MessageRole, Model, parse_json_if_needed from .monitoring import ( YELLOW_HEX, AgentLogger, LogLevel, Monitor, ) +from .remote_executors import DockerExecutor, E2BExecutor from .tools import Tool from .utils import ( AgentError, @@ -63,9 +72,11 @@ AgentGenerationError, AgentMaxStepsError, AgentParsingError, + AgentToolCallError, + AgentToolExecutionError, + is_valid_name, make_init_file, parse_code_blobs, - parse_json_tool_call, truncate_content, ) @@ -73,12 +84,12 @@ logger = getLogger(__name__) -def get_variable_names(self, template: str) -> Set[str]: +def get_variable_names(self, template: str) -> set[str]: pattern = re.compile(r"\{\{([^{}]+)\}\}") return {match.group(1).strip() for match in pattern.finditer(template)} -def populate_template(template: str, variables: Dict[str, Any]) -> str: +def populate_template(template: str, variables: dict[str, Any]) -> str: compiled_template = Template(template, undefined=StrictUndefined) try: return compiled_template.render(**variables) @@ -91,18 +102,12 @@ class PlanningPromptTemplate(TypedDict): Prompt templates for the planning step. Args: - initial_facts (`str`): Initial facts prompt. - initial_plan (`str`): Initial plan prompt. - update_facts_pre_messages (`str`): Update facts pre-messages prompt. - update_facts_post_messages (`str`): Update facts post-messages prompt. + plan (`str`): Initial plan prompt. update_plan_pre_messages (`str`): Update plan pre-messages prompt. update_plan_post_messages (`str`): Update plan post-messages prompt. """ - initial_facts: str initial_plan: str - update_facts_pre_messages: str - update_facts_post_messages: str update_plan_pre_messages: str update_plan_post_messages: str @@ -153,10 +158,7 @@ class PromptTemplates(TypedDict): EMPTY_PROMPT_TEMPLATES = PromptTemplates( system_prompt="", planning=PlanningPromptTemplate( - initial_facts="", initial_plan="", - update_facts_pre_messages="", - update_facts_post_messages="", update_plan_pre_messages="", update_plan_post_messages="", ), @@ -165,7 +167,7 @@ class PromptTemplates(TypedDict): ) -class MultiStepAgent: +class MultiStepAgent(ABC): """ Agent class that solves the given task step by step, using the ReAct framework: While the objective is not reached, the agent will perform a cycle of action (given by the LLM) and observation (obtained from the environment). @@ -174,7 +176,7 @@ class MultiStepAgent: tools (`list[Tool]`): [`Tool`]s that the agent can use. model (`Callable[[list[dict[str, str]]], ChatMessage]`): Model that will generate the agent's actions. prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates. - max_steps (`int`, default `6`): Maximum number of steps the agent can take to solve the task. + max_steps (`int`, default `20`): Maximum number of steps the agent can take to solve the task. tool_parser (`Callable`, *optional*): Function used to parse the tool calls from the LLM output. add_base_tools (`bool`, default `False`): Whether to add the base tools to the agent's tools. verbosity_level (`LogLevel`, default `LogLevel.INFO`): Level of verbosity of the agent's logs. @@ -190,32 +192,43 @@ class MultiStepAgent: def __init__( self, - tools: List[Tool], - model: Callable[[List[Dict[str, str]]], ChatMessage], - prompt_templates: Optional[PromptTemplates] = None, - max_steps: int = 6, - tool_parser: Optional[Callable] = None, + tools: list[Tool], + model: Model, + prompt_templates: PromptTemplates | None = None, + max_steps: int = 20, add_base_tools: bool = False, verbosity_level: LogLevel = LogLevel.INFO, - grammar: Optional[Dict[str, str]] = None, - managed_agents: Optional[List] = None, - step_callbacks: Optional[List[Callable]] = None, - planning_interval: Optional[int] = None, - name: Optional[str] = None, - description: Optional[str] = None, + grammar: dict[str, str] | None = None, + managed_agents: list | None = None, + step_callbacks: list[Callable] | None = None, + planning_interval: int | None = None, + name: str | None = None, + description: str | None = None, provide_run_summary: bool = False, - final_answer_checks: Optional[List[Callable]] = None, + final_answer_checks: list[Callable] | None = None, + logger: AgentLogger | None = None, ): self.agent_name = self.__class__.__name__ self.model = model self.prompt_templates = prompt_templates or EMPTY_PROMPT_TEMPLATES + if prompt_templates is not None: + missing_keys = set(EMPTY_PROMPT_TEMPLATES.keys()) - set(prompt_templates.keys()) + assert not missing_keys, ( + f"Some prompt templates are missing from your custom `prompt_templates`: {missing_keys}" + ) + for key, value in EMPTY_PROMPT_TEMPLATES.items(): + if isinstance(value, dict): + for subkey in value.keys(): + assert key in prompt_templates.keys() and (subkey in prompt_templates[key].keys()), ( + f"Some prompt templates are missing from your custom `prompt_templates`: {subkey} under {key}" + ) + self.max_steps = max_steps self.step_number = 0 - self.tool_parser = tool_parser or parse_json_tool_call self.grammar = grammar self.planning_interval = planning_interval - self.state = {} - self.name = name + self.state: dict[str, Any] = {} + self.name = self._validate_name(name) self.description = description self.provide_run_summary = provide_run_summary self.final_answer_checks = final_answer_checks @@ -225,15 +238,25 @@ def __init__( self._validate_tools_and_managed_agents(tools, managed_agents) self.system_prompt = self.initialize_system_prompt() - self.input_messages = None - self.task = None + self.task: str | None = None self.memory = AgentMemory(self.system_prompt) - self.logger = AgentLogger(level=verbosity_level) + + if logger is None: + self.logger = AgentLogger(level=verbosity_level) + else: + self.logger = logger + self.monitor = Monitor(self.model, self.logger) self.step_callbacks = step_callbacks if step_callbacks is not None else [] self.step_callbacks.append(self.monitor.update_metrics) - def _setup_managed_agents(self, managed_agents): + def _validate_name(self, name: str | None) -> str | None: + if name is not None and not is_valid_name(name): + raise ValueError(f"Agent name '{name}' must be a valid Python identifier and not a reserved keyword.") + return name + + def _setup_managed_agents(self, managed_agents: list | None = None) -> None: + """Setup managed agents with proper logging.""" self.managed_agents = {} if managed_agents: assert all(agent.name and agent.description for agent in managed_agents), ( @@ -252,16 +275,14 @@ def _setup_tools(self, tools, add_base_tools): if name != "python_interpreter" or self.__class__.__name__ == "ToolCallingAgent" } ) - self.tools["final_answer"] = FinalAnswerTool() + self.tools.setdefault("final_answer", FinalAnswerTool()) def _validate_tools_and_managed_agents(self, tools, managed_agents): tool_and_managed_agent_names = [tool.name for tool in tools] if managed_agents is not None: - for agent in managed_agents: - tool_and_managed_agent_names.append(agent.name) - for tool in agent.tools.values(): - if tool.name != "final_answer": - tool_and_managed_agent_names.append(tool.name) + tool_and_managed_agent_names += [agent.name for agent in managed_agents] + if self.name: + tool_and_managed_agent_names.append(self.name) if len(tool_and_managed_agent_names) != len(set(tool_and_managed_agent_names)): raise ValueError( "Each tool or managed_agent should have a unique name! You passed these duplicate names: " @@ -273,18 +294,22 @@ def run( task: str, stream: bool = False, reset: bool = True, - images: Optional[List[str]] = None, - additional_args: Optional[Dict] = None, + images: list["PIL.Image.Image"] | None = None, + additional_args: dict | None = None, + max_steps: int | None = None, ): """ Run the agent for the given task. Args: task (`str`): Task to perform. - stream (`bool`): Whether to run in a streaming way. + stream (`bool`): Whether to run in streaming mode. + If `True`, returns a generator that yields each step as it is executed. You must iterate over this generator to process the individual steps (e.g., using a for loop or `next()`). + If `False`, executes all steps internally and returns only the final answer after completion. reset (`bool`): Whether to reset the conversation or keep it going from previous run. - images (`list[str]`, *optional*): Paths to image(s). - additional_args (`dict`): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names! + images (`list[PIL.Image.Image]`, *optional*): Image(s) objects. + additional_args (`dict`, *optional*): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names! + max_steps (`int`, *optional*): Maximum number of steps the agent can take to solve the task. if not provided, will use the agent's default value. Example: ```py @@ -293,8 +318,9 @@ def run( agent.run("What is the result of 2 power 3.7384?") ``` """ - + max_steps = max_steps or self.max_steps self.task = task + self.interrupt_switch = False if additional_args is not None: self.state.update(additional_args) self.task += f""" @@ -313,41 +339,58 @@ def run( level=LogLevel.INFO, title=self.name if hasattr(self, "name") else None, ) - self.memory.steps.append(TaskStep(task=self.task, task_images=images)) + if getattr(self, "python_executor", None): + self.python_executor.send_variables(variables=self.state) + self.python_executor.send_tools({**self.tools, **self.managed_agents}) + if stream: # The steps are returned as they are executed through a generator to iterate on. - return self._run(task=self.task, images=images) - # Outputs are returned only at the end as a string. We only look at the last step - return deque(self._run(task=self.task, images=images), maxlen=1)[0] + return self._run(task=self.task, max_steps=max_steps, images=images) + # Outputs are returned only at the end. We only look at the last step. + return deque(self._run(task=self.task, max_steps=max_steps, images=images), maxlen=1)[0].final_answer - def _run(self, task: str, images: List[str] | None = None) -> Generator[ActionStep | AgentType, None, None]: + def _run( + self, task: str, max_steps: int, images: list["PIL.Image.Image"] | None = None + ) -> Generator[ActionStep | PlanningStep | FinalAnswerStep]: final_answer = None self.step_number = 1 - while final_answer is None and self.step_number <= self.max_steps: + while final_answer is None and self.step_number <= max_steps: + if self.interrupt_switch: + raise AgentError("Agent interrupted.", self.logger) step_start_time = time.time() - memory_step = self._create_memory_step(step_start_time, images) + if self.planning_interval is not None and ( + self.step_number == 1 or (self.step_number - 1) % self.planning_interval == 0 + ): + planning_step = self._generate_planning_step( + task, is_first_step=(self.step_number == 1), step=self.step_number + ) + self.memory.steps.append(planning_step) + yield planning_step + action_step = ActionStep( + step_number=self.step_number, start_time=step_start_time, observations_images=images + ) try: - final_answer = self._execute_step(task, memory_step) + final_answer = self._execute_step(task, action_step) + except AgentGenerationError as e: + # Agent generation errors are not caused by a Model error but an implementation error: so we should raise them and exit. + raise e except AgentError as e: - memory_step.error = e + # Other AgentError types are caused by the Model, so we should log them and iterate. + action_step.error = e finally: - self._finalize_step(memory_step, step_start_time) - yield memory_step + self._finalize_step(action_step, step_start_time) + self.memory.steps.append(action_step) + yield action_step self.step_number += 1 - if final_answer is None and self.step_number == self.max_steps + 1: + if final_answer is None and self.step_number == max_steps + 1: final_answer = self._handle_max_steps_reached(task, images, step_start_time) - yield memory_step - yield handle_agent_output_types(final_answer) - - def _create_memory_step(self, step_start_time: float, images: List[str] | None) -> ActionStep: - return ActionStep(step_number=self.step_number, start_time=step_start_time, observations_images=images) + yield action_step + yield FinalAnswerStep(handle_agent_output_types(final_answer)) - def _execute_step(self, task: str, memory_step: ActionStep) -> Union[None, Any]: - if self.planning_interval is not None and self.step_number % self.planning_interval == 1: - self.planning_step(task, is_first_step=(self.step_number == 1), step=self.step_number) + def _execute_step(self, task: str, memory_step: ActionStep) -> None | Any: self.logger.log_rule(f"Step {self.step_number}", level=LogLevel.INFO) final_answer = self.step(memory_step) if final_answer is not None and self.final_answer_checks: @@ -364,14 +407,13 @@ def _validate_final_answer(self, final_answer: Any): def _finalize_step(self, memory_step: ActionStep, step_start_time: float): memory_step.end_time = time.time() memory_step.duration = memory_step.end_time - step_start_time - self.memory.steps.append(memory_step) for callback in self.step_callbacks: # For compatibility with old callbacks that don't take the agent as an argument callback(memory_step) if len(inspect.signature(callback).parameters) == 1 else callback( memory_step, agent=self ) - def _handle_max_steps_reached(self, task: str, images: List[str], step_start_time: float) -> Any: + def _handle_max_steps_reached(self, task: str, images: list["PIL.Image.Image"], step_start_time: float) -> Any: final_answer = self.provide_final_answer(task, images) final_memory_step = ActionStep( step_number=self.step_number, error=AgentMaxStepsError("Reached max steps.", self.logger) @@ -386,124 +428,70 @@ def _handle_max_steps_reached(self, task: str, images: List[str], step_start_tim ) return final_answer - def planning_step(self, task, is_first_step: bool, step: int) -> None: - input_messages, facts_message, plan_message = ( - self._generate_initial_plan(task) if is_first_step else self._generate_updated_plan(task, step) - ) - self._record_planning_step(input_messages, facts_message, plan_message, is_first_step) - - def _generate_initial_plan(self, task: str) -> Tuple[ChatMessage, ChatMessage]: - input_messages = [ - { + def _generate_planning_step(self, task, is_first_step: bool, step: int) -> PlanningStep: + if is_first_step: + input_messages = [ + { + "role": MessageRole.USER, + "content": [ + { + "type": "text", + "text": populate_template( + self.prompt_templates["planning"]["initial_plan"], + variables={"task": task, "tools": self.tools, "managed_agents": self.managed_agents}, + ), + } + ], + } + ] + plan_message = self.model(input_messages, stop_sequences=[""]) + plan = textwrap.dedent( + f"""Here are the facts I know and the plan of action that I will follow to solve the task:\n```\n{plan_message.content}\n```""" + ) + else: + # Summary mode removes the system prompt and previous planning messages output by the model. + # Removing previous planning messages avoids influencing too much the new plan. + memory_messages = self.write_memory_to_messages(summary_mode=True) + plan_update_pre = { + "role": MessageRole.SYSTEM, + "content": [ + { + "type": "text", + "text": populate_template( + self.prompt_templates["planning"]["update_plan_pre_messages"], variables={"task": task} + ), + } + ], + } + plan_update_post = { "role": MessageRole.USER, "content": [ { "type": "text", "text": populate_template( - self.prompt_templates["planning"]["initial_facts"], variables={"task": task} + self.prompt_templates["planning"]["update_plan_post_messages"], + variables={ + "task": task, + "tools": self.tools, + "managed_agents": self.managed_agents, + "remaining_steps": (self.max_steps - step), + }, ), } ], - }, - ] - facts_message = self.model(input_messages) - - message_prompt_plan = { - "role": MessageRole.USER, - "content": [ - { - "type": "text", - "text": populate_template( - self.prompt_templates["planning"]["initial_plan"], - variables={ - "task": task, - "tools": self.tools, - "managed_agents": self.managed_agents, - "answer_facts": facts_message.content, - }, - ), - } - ], - } - plan_message = self.model([message_prompt_plan], stop_sequences=[""]) - return input_messages, facts_message, plan_message - - def _generate_updated_plan(self, task: str, step: int) -> Tuple[ChatMessage, ChatMessage]: - # Do not take the system prompt message from the memory - # summary_mode=False: Do not take previous plan steps to avoid influencing the new plan - memory_messages = self.write_memory_to_messages()[1:] - facts_update_pre = { - "role": MessageRole.SYSTEM, - "content": [{"type": "text", "text": self.prompt_templates["planning"]["update_facts_pre_messages"]}], - } - facts_update_post = { - "role": MessageRole.USER, - "content": [{"type": "text", "text": self.prompt_templates["planning"]["update_facts_post_messages"]}], - } - input_messages = [facts_update_pre] + memory_messages + [facts_update_post] - facts_message = self.model(input_messages) - - update_plan_pre = { - "role": MessageRole.SYSTEM, - "content": [ - { - "type": "text", - "text": populate_template( - self.prompt_templates["planning"]["update_plan_pre_messages"], variables={"task": task} - ), - } - ], - } - update_plan_post = { - "role": MessageRole.USER, - "content": [ - { - "type": "text", - "text": populate_template( - self.prompt_templates["planning"]["update_plan_post_messages"], - variables={ - "task": task, - "tools": self.tools, - "managed_agents": self.managed_agents, - "facts_update": facts_message.content, - "remaining_steps": (self.max_steps - step), - }, - ), - } - ], - } - plan_message = self.model( - [update_plan_pre] + memory_messages + [update_plan_post], stop_sequences=[""] - ) - return input_messages, facts_message, plan_message - - def _record_planning_step( - self, input_messages: list, facts_message: ChatMessage, plan_message: ChatMessage, is_first_step: bool - ) -> None: - if is_first_step: - facts = textwrap.dedent(f"""Here are the facts that I know so far:\n```\n{facts_message.content}\n```""") - plan = textwrap.dedent( - f"""Here is the plan of action that I will follow to solve the task:\n```\n{plan_message.content}\n```""" - ) - log_message = "Initial plan" - else: - facts = textwrap.dedent( - f"""Here is the updated list of the facts that I know:\n```\n{facts_message.content}\n```""" - ) + } + input_messages = [plan_update_pre] + memory_messages + [plan_update_post] + plan_message = self.model(input_messages, stop_sequences=[""]) plan = textwrap.dedent( - f"""I still need to solve the task I was given:\n```\n{self.task}\n```\n\nHere is my new/updated plan of action to solve the task:\n```\n{plan_message.content}\n```""" - ) - log_message = "Updated plan" - self.memory.steps.append( - PlanningStep( - model_input_messages=input_messages, - facts=facts, - plan=plan, - model_output_message_plan=plan_message, - model_output_message_facts=facts_message, + f"""I still need to solve the task I was given:\n```\n{self.task}\n```\n\nHere are the facts I know and my new/updated plan of action to solve the task:\n```\n{plan_message.content}\n```""" ) + log_headline = "Initial plan" if is_first_step else "Updated plan" + self.logger.log(Rule(f"[bold]{log_headline}", style="orange"), Text(plan), level=LogLevel.INFO) + return PlanningStep( + model_input_messages=input_messages, + plan=plan, + model_output_message=plan_message, ) - self.logger.log(Rule(f"[bold]{log_message}", style="orange"), Text(plan), level=LogLevel.INFO) @property def logs(self): @@ -512,14 +500,19 @@ def logs(self): ) return [self.memory.system_prompt] + self.memory.steps - def initialize_system_prompt(self): + @abstractmethod + def initialize_system_prompt(self) -> str: """To be implemented in child classes""" - pass + ... + + def interrupt(self): + """Interrupts the agent execution.""" + self.interrupt_switch = True def write_memory_to_messages( self, - summary_mode: Optional[bool] = False, - ) -> List[Dict[str, str]]: + summary_mode: bool | None = False, + ) -> list[Message]: """ Reads past llm_outputs, actions, and observations or errors from the memory into a series of messages that can be used as input to the LLM. Adds a number of keywords (such as PLAN, error, etc) to help @@ -534,7 +527,7 @@ def visualize(self): """Creates a rich tree visualization of the agent's structure.""" self.logger.visualize_agent_tree(self) - def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]: + def extract_action(self, model_output: str, split_token: str) -> tuple[str, str]: """ Parse action from the LLM output @@ -555,13 +548,13 @@ def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str] ) return rationale.strip(), action.strip() - def provide_final_answer(self, task: str, images: Optional[list[str]]) -> str: + def provide_final_answer(self, task: str, images: list["PIL.Image.Image"] | None = None) -> str: """ Provide the final answer to the task, based on the logs of the agent's interactions. Args: task (`str`): Task to perform. - images (`list[str]`, *optional*): Paths to image(s). + images (`list[PIL.Image.Image]`, *optional*): Image(s) objects. Returns: `str`: Final answer to the task. @@ -599,54 +592,8 @@ def provide_final_answer(self, task: str, images: Optional[list[str]]) -> str: except Exception as e: return f"Error in generating final LLM output:\n{e}" - def execute_tool_call(self, tool_name: str, arguments: Union[Dict[str, str], str]) -> Any: - """ - Execute tool with the provided input and returns the result. - This method replaces arguments with the actual values from the state if they refer to state variables. - - Args: - tool_name (`str`): Name of the Tool to execute (should be one from self.tools). - arguments (Dict[str, str]): Arguments passed to the Tool. - """ - available_tools = {**self.tools, **self.managed_agents} - if tool_name not in available_tools: - error_msg = f"Unknown tool {tool_name}, should be instead one of {list(available_tools.keys())}." - raise AgentExecutionError(error_msg, self.logger) - - try: - if isinstance(arguments, str): - if tool_name in self.managed_agents: - observation = available_tools[tool_name].__call__(arguments) - else: - observation = available_tools[tool_name].__call__(arguments, sanitize_inputs_outputs=True) - elif isinstance(arguments, dict): - for key, value in arguments.items(): - if isinstance(value, str) and value in self.state: - arguments[key] = self.state[value] - if tool_name in self.managed_agents: - observation = available_tools[tool_name].__call__(**arguments) - else: - observation = available_tools[tool_name].__call__(**arguments, sanitize_inputs_outputs=True) - else: - error_msg = f"Arguments passed to tool should be a dict or string: got a {type(arguments)}." - raise AgentExecutionError(error_msg, self.logger) - return observation - except Exception as e: - if tool_name in self.tools: - tool = self.tools[tool_name] - error_msg = ( - f"Error when executing tool {tool_name} with arguments {arguments}: {type(e).__name__}: {e}\nYou should only use this tool with a correct input.\n" - f"As a reminder, this tool's description is the following: '{tool.description}'.\nIt takes inputs: {tool.inputs} and returns output type {tool.output_type}" - ) - raise AgentExecutionError(error_msg, self.logger) - elif tool_name in self.managed_agents: - error_msg = ( - f"Error in calling team member: {e}\nYou should only ask this team member with a correct request.\n" - f"As a reminder, this team member's description is the following:\n{available_tools[tool_name]}" - ) - raise AgentExecutionError(error_msg, self.logger) - - def step(self, memory_step: ActionStep) -> Union[None, Any]: + @abstractmethod + def step(self, memory_step: ActionStep) -> None | Any: """To be implemented in children classes. Should return either None if the step is not final.""" pass @@ -661,7 +608,6 @@ def replay(self, detailed: bool = False): def __call__(self, task: str, **kwargs): """Adds additional prompting for the managed agent, runs it, and wraps the output. - This method is called only by a managed agent. """ full_task = populate_template( @@ -680,7 +626,7 @@ def __call__(self, task: str, **kwargs): answer += "\n" return answer - def save(self, output_dir: str, relative_path: Optional[str] = None): + def save(self, output_dir: str | Path, relative_path: str | None = None): """ Saves the relevant code files for your agent. This will copy the code of your agent in `output_dir` as well as autogenerate: @@ -693,7 +639,7 @@ def save(self, output_dir: str, relative_path: Optional[str] = None): code) Args: - output_dir (`str`): The folder in which you want to save your tool. + output_dir (`str` or `Path`): The folder in which you want to save your agent. """ make_init_file(output_dir) @@ -730,6 +676,7 @@ def save(self, output_dir: str, relative_path: Optional[str] = None): # Save agent dictionary to json agent_dict = self.to_dict() agent_dict["tools"] = [tool.name for tool in self.tools.values()] + agent_dict["managed_agents"] = {agent.name: agent.__class__.__name__ for agent in self.managed_agents.values()} with open(os.path.join(output_dir, "agent.json"), "w", encoding="utf-8") as f: json.dump(agent_dict, f, indent=4) @@ -798,8 +745,12 @@ def save(self, output_dir: str, relative_path: Optional[str] = None): with open(os.path.join(output_dir, "app.py"), "w", encoding="utf-8") as f: f.write(app_text + "\n") # Append newline at the end - def to_dict(self) -> Dict[str, Any]: - """Converts agent into a dictionary.""" + def to_dict(self) -> dict[str, Any]: + """Convert the agent to a dictionary representation. + + Returns: + `dict`: Dictionary representation of the agent. + """ # TODO: handle serializing step_callbacks and final_answer_checks for attr in ["final_answer_checks", "step_callbacks"]: if getattr(self, attr, None): @@ -817,14 +768,13 @@ def to_dict(self) -> Dict[str, Any]: ) agent_dict = { + "class": self.__class__.__name__, "tools": tool_dicts, "model": { "class": self.model.__class__.__name__, "data": self.model.to_dict(), }, - "managed_agents": { - managed_agent.name: managed_agent.__class__.__name__ for managed_agent in self.managed_agents.values() - }, + "managed_agents": [managed_agent.to_dict() for managed_agent in self.managed_agents.values()], "prompt_templates": self.prompt_templates, "max_steps": self.max_steps, "verbosity_level": int(self.logger.level), @@ -832,21 +782,58 @@ def to_dict(self) -> Dict[str, Any]: "planning_interval": self.planning_interval, "name": self.name, "description": self.description, - "requirements": list(requirements), + "requirements": sorted(requirements), } - if hasattr(self, "authorized_imports"): - agent_dict["authorized_imports"] = self.authorized_imports - if hasattr(self, "use_e2b_executor"): - agent_dict["use_e2b_executor"] = self.use_e2b_executor - if hasattr(self, "max_print_outputs_length"): - agent_dict["max_print_outputs_length"] = self.max_print_outputs_length return agent_dict + @classmethod + def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "MultiStepAgent": + """Create agent from a dictionary representation. + + Args: + agent_dict (`dict[str, Any]`): Dictionary representation of the agent. + **kwargs: Additional keyword arguments that will override agent_dict values. + + Returns: + `MultiStepAgent`: Instance of the agent class. + """ + # Load model + model_info = agent_dict["model"] + model_class = getattr(importlib.import_module("smolagents.models"), model_info["class"]) + model = model_class.from_dict(model_info["data"]) + # Load tools + tools = [] + for tool_info in agent_dict["tools"]: + tools.append(Tool.from_code(tool_info["code"])) + # Load managed agents + managed_agents = [] + for managed_agent_name, managed_agent_class_name in agent_dict["managed_agents"].items(): + managed_agent_class = getattr(importlib.import_module("smolagents.agents"), managed_agent_class_name) + managed_agents.append(managed_agent_class.from_dict(agent_dict["managed_agents"][managed_agent_name])) + # Extract base agent parameters + agent_args = { + "model": model, + "tools": tools, + "prompt_templates": agent_dict.get("prompt_templates"), + "max_steps": agent_dict.get("max_steps"), + "verbosity_level": agent_dict.get("verbosity_level"), + "grammar": agent_dict.get("grammar"), + "planning_interval": agent_dict.get("planning_interval"), + "name": agent_dict.get("name"), + "description": agent_dict.get("description"), + } + # Filter out None values to use defaults from __init__ + agent_args = {k: v for k, v in agent_args.items() if v is not None} + # Update with any additional kwargs + agent_args.update(kwargs) + # Create agent instance + return cls(**agent_args) + @classmethod def from_hub( cls, repo_id: str, - token: Optional[str] = None, + token: str | None = None, trust_remote_code: bool = False, **kwargs, ): @@ -897,54 +884,43 @@ def from_hub( return cls.from_folder(download_folder, **kwargs) @classmethod - def from_folder(cls, folder: Union[str, Path], **kwargs): + def from_folder(cls, folder: str | Path, **kwargs): """Loads an agent from a local folder. Args: folder (`str` or `Path`): The folder where the agent is saved. **kwargs: Additional keyword arguments that will be passed to the agent's init. """ + # Load agent.json folder = Path(folder) agent_dict = json.loads((folder / "agent.json").read_text()) - # Recursively get managed agents + # Load managed agents from their respective folders, recursively managed_agents = [] - for managed_agent_name, managed_agent_class in agent_dict["managed_agents"].items(): - agent_cls = getattr(importlib.import_module("smolagents.agents"), managed_agent_class) + for managed_agent_name, managed_agent_class_name in agent_dict["managed_agents"].items(): + agent_cls = getattr(importlib.import_module("smolagents.agents"), managed_agent_class_name) managed_agents.append(agent_cls.from_folder(folder / "managed_agents" / managed_agent_name)) + agent_dict["managed_agents"] = {} + # Load tools tools = [] for tool_name in agent_dict["tools"]: tool_code = (folder / "tools" / f"{tool_name}.py").read_text() - tools.append(Tool.from_code(tool_code)) + tools.append({"name": tool_name, "code": tool_code}) + agent_dict["tools"] = tools - model_class: Model = getattr(importlib.import_module("smolagents.models"), agent_dict["model"]["class"]) - model = model_class.from_dict(agent_dict["model"]["data"]) + # Add managed agents to kwargs to override the empty list in from_dict + if managed_agents: + kwargs["managed_agents"] = managed_agents - args = dict( - model=model, - tools=tools, - managed_agents=managed_agents, - name=agent_dict["name"], - description=agent_dict["description"], - max_steps=agent_dict["max_steps"], - planning_interval=agent_dict["planning_interval"], - grammar=agent_dict["grammar"], - verbosity_level=agent_dict["verbosity_level"], - ) - if cls.__name__ == "CodeAgent": - args["additional_authorized_imports"] = agent_dict["authorized_imports"] - args["use_e2b_executor"] = agent_dict["use_e2b_executor"] - args["max_print_outputs_length"] = agent_dict["max_print_outputs_length"] - args.update(kwargs) - return cls(**args) + return cls.from_dict(agent_dict, **kwargs) def push_to_hub( self, repo_id: str, commit_message: str = "Upload agent", - private: Optional[bool] = None, - token: Optional[Union[bool, str]] = None, + private: bool | None = None, + token: bool | str | None = None, create_pr: bool = False, ) -> str: """ @@ -1008,10 +984,10 @@ class ToolCallingAgent(MultiStepAgent): def __init__( self, - tools: List[Tool], - model: Callable[[List[Dict[str, str]]], ChatMessage], - prompt_templates: Optional[PromptTemplates] = None, - planning_interval: Optional[int] = None, + tools: list[Tool], + model: Callable[[list[dict[str, str]]], ChatMessage], + prompt_templates: PromptTemplates | None = None, + planning_interval: int | None = None, **kwargs, ): prompt_templates = prompt_templates or yaml.safe_load( @@ -1032,34 +1008,49 @@ def initialize_system_prompt(self) -> str: ) return system_prompt - def step(self, memory_step: ActionStep) -> Union[None, Any]: + def step(self, memory_step: ActionStep) -> None | Any: """ Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. Returns None if the step is not final. """ memory_messages = self.write_memory_to_messages() - self.input_messages = memory_messages + input_messages = memory_messages.copy() # Add new step in logs - memory_step.model_input_messages = memory_messages.copy() + memory_step.model_input_messages = input_messages try: - model_message: ChatMessage = self.model( - memory_messages, + chat_message: ChatMessage = self.model( + input_messages, + stop_sequences=["Observation:", "Calling tools:"], tools_to_call_from=list(self.tools.values()), - stop_sequences=["Observation:"], ) - memory_step.model_output_message = model_message - if model_message.tool_calls is None or len(model_message.tool_calls) == 0: - raise Exception("Model did not call any tools. Call `final_answer` tool to return a final answer.") - tool_call = model_message.tool_calls[0] - tool_name, tool_call_id = tool_call.function.name, tool_call.id - tool_arguments = tool_call.function.arguments + memory_step.model_output_message = chat_message + model_output = chat_message.content + self.logger.log_markdown( + content=model_output if model_output else str(chat_message.raw), + title="Output message of the LLM:", + level=LogLevel.DEBUG, + ) + memory_step.model_output_message.content = model_output + memory_step.model_output = model_output except Exception as e: - raise AgentGenerationError(f"Error in generating tool call with model:\n{e}", self.logger) from e + raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e + if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0: + try: + chat_message = self.model.parse_tool_calls(chat_message) + except Exception as e: + raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger) + else: + for tool_call in chat_message.tool_calls: + tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) + tool_call = chat_message.tool_calls[0] # type: ignore + tool_name, tool_call_id = tool_call.function.name, tool_call.id + tool_arguments = tool_call.function.arguments + memory_step.model_output = str(f"Called Tool: '{tool_name}' with arguments: {tool_arguments}") memory_step.tool_calls = [ToolCall(name=tool_name, arguments=tool_arguments, id=tool_call_id)] # Execute @@ -1115,6 +1106,79 @@ def step(self, memory_step: ActionStep) -> Union[None, Any]: memory_step.observations = updated_information return None + def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str: + """Replace string values in arguments with their corresponding state values if they exist.""" + if isinstance(arguments, dict): + return { + key: self.state.get(value, value) if isinstance(value, str) else value + for key, value in arguments.items() + } + return arguments + + def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any: + """ + Execute a tool or managed agent with the provided arguments. + + The arguments are replaced with the actual values from the state if they refer to state variables. + + Args: + tool_name (`str`): Name of the tool or managed agent to execute. + arguments (dict[str, str] | str): Arguments passed to the tool call. + """ + # Check if the tool exists + available_tools = {**self.tools, **self.managed_agents} + if tool_name not in available_tools: + raise AgentToolExecutionError( + f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger + ) + + # Get the tool and substitute state variables in arguments + tool = available_tools[tool_name] + arguments = self._substitute_state_variables(arguments) + is_managed_agent = tool_name in self.managed_agents + + try: + # Call tool with appropriate arguments + if isinstance(arguments, dict): + return tool(**arguments) if is_managed_agent else tool(**arguments, sanitize_inputs_outputs=True) + elif isinstance(arguments, str): + return tool(arguments) if is_managed_agent else tool(arguments, sanitize_inputs_outputs=True) + else: + raise TypeError(f"Unsupported arguments type: {type(arguments)}") + + except TypeError as e: + # Handle invalid arguments + description = getattr(tool, "description", "No description") + if is_managed_agent: + error_msg = ( + f"Invalid request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" + "You should call this team member with a valid request.\n" + f"Team member description: {description}" + ) + else: + error_msg = ( + f"Invalid call to tool '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" + "You should call this tool with correct input arguments.\n" + f"Expected inputs: {json.dumps(tool.inputs)}\n" + f"Returns output type: {tool.output_type}\n" + f"Tool description: '{description}'" + ) + raise AgentToolCallError(error_msg, self.logger) from e + + except Exception as e: + # Handle execution errors + if is_managed_agent: + error_msg = ( + f"Error executing request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" + "Please try again or request to another team member" + ) + else: + error_msg = ( + f"Error executing tool '{tool_name}' with arguments {json.dumps(arguments)}: {type(e).__name__}: {e}\n" + "Please try again or use another tool" + ) + raise AgentToolExecutionError(error_msg, self.logger) from e + class CodeAgent(MultiStepAgent): """ @@ -1122,32 +1186,34 @@ class CodeAgent(MultiStepAgent): Args: tools (`list[Tool]`): [`Tool`]s that the agent can use. - model (`Callable[[list[dict[str, str]]], ChatMessage]`): Model that will generate the agent's actions. + model (`Model`): Model that will generate the agent's actions. prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates. grammar (`dict[str, str]`, *optional*): Grammar used to parse the LLM output. additional_authorized_imports (`list[str]`, *optional*): Additional authorized imports for the agent. planning_interval (`int`, *optional*): Interval at which the agent will run a planning step. - use_e2b_executor (`bool`, default `False`): Whether to use the E2B executor for remote code execution. + executor_type (`str`, default `"local"`): Which executor type to use between `"local"`, `"e2b"`, or `"docker"`. + executor_kwargs (`dict`, *optional*): Additional arguments to pass to initialize the executor. max_print_outputs_length (`int`, *optional*): Maximum length of the print outputs. + stream_outputs (`bool`, *optional*, default `False`): Whether to stream outputs during execution. **kwargs: Additional keyword arguments. - """ def __init__( self, - tools: List[Tool], - model: Callable[[List[Dict[str, str]]], ChatMessage], - prompt_templates: Optional[PromptTemplates] = None, - grammar: Optional[Dict[str, str]] = None, - additional_authorized_imports: Optional[List[str]] = None, - planning_interval: Optional[int] = None, - use_e2b_executor: bool = False, - max_print_outputs_length: Optional[int] = None, + tools: list[Tool], + model: Model, + prompt_templates: PromptTemplates | None = None, + grammar: dict[str, str] | None = None, + additional_authorized_imports: list[str] | None = None, + planning_interval: int | None = None, + executor_type: str | None = "local", + executor_kwargs: dict[str, Any] | None = None, + max_print_outputs_length: int | None = None, + stream_outputs: bool = False, **kwargs, ): self.additional_authorized_imports = additional_authorized_imports if additional_authorized_imports else [] - self.authorized_imports = list(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports)) - self.use_e2b_executor = use_e2b_executor + self.authorized_imports = sorted(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports)) self.max_print_outputs_length = max_print_outputs_length prompt_templates = prompt_templates or yaml.safe_load( importlib.resources.files("smolagents.prompts").joinpath("code_agent.yaml").read_text() @@ -1160,30 +1226,36 @@ def __init__( planning_interval=planning_interval, **kwargs, ) + self.stream_outputs = stream_outputs + if self.stream_outputs and not hasattr(self.model, "generate_stream"): + raise ValueError( + "`stream_outputs` is set to True, but the model class implements no `generate_stream` method." + ) if "*" in self.additional_authorized_imports: self.logger.log( "Caution: you set an authorization for all imports, meaning your agent can decide to import any package it deems necessary. This might raise issues if the package is not installed in your environment.", - 0, - ) - - if use_e2b_executor and len(self.managed_agents) > 0: - raise Exception( - f"You passed both {use_e2b_executor=} and some managed agents. Managed agents is not yet supported with remote code execution." - ) - - all_tools = {**self.tools, **self.managed_agents} - if use_e2b_executor: - self.python_executor = E2BExecutor( - self.additional_authorized_imports, - list(all_tools.values()), - self.logger, - ) - else: - self.python_executor = LocalPythonInterpreter( - self.additional_authorized_imports, - all_tools, - max_print_outputs_length=max_print_outputs_length, + level=LogLevel.INFO, ) + self.executor_type = executor_type or "local" + self.executor_kwargs = executor_kwargs or {} + self.python_executor = self.create_python_executor() + + def create_python_executor(self) -> PythonExecutor: + match self.executor_type: + case "e2b" | "docker": + if self.managed_agents: + raise Exception("Managed agents are not yet supported with remote code execution.") + if self.executor_type == "e2b": + return E2BExecutor(self.additional_authorized_imports, self.logger, **self.executor_kwargs) + else: + return DockerExecutor(self.additional_authorized_imports, self.logger, **self.executor_kwargs) + case "local": + return LocalPythonExecutor( + self.additional_authorized_imports, + max_print_outputs_length=self.max_print_outputs_length, + ) + case _: # if applicable + raise ValueError(f"Unsupported executor type: {self.executor_type}") def initialize_system_prompt(self) -> str: system_prompt = populate_template( @@ -1200,37 +1272,60 @@ def initialize_system_prompt(self) -> str: ) return system_prompt - def step(self, memory_step: ActionStep) -> Union[None, Any]: + def step(self, memory_step: ActionStep) -> None | Any: """ Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. Returns None if the step is not final. """ memory_messages = self.write_memory_to_messages() - self.input_messages = memory_messages.copy() - - # Add new step in logs - memory_step.model_input_messages = memory_messages.copy() + input_messages = memory_messages.copy() + ### Generate model output ### + memory_step.model_input_messages = input_messages try: additional_args = {"grammar": self.grammar} if self.grammar is not None else {} - chat_message: ChatMessage = self.model( - self.input_messages, - stop_sequences=["", "Observation:"], - **additional_args, - ) - memory_step.model_output_message = chat_message - model_output = chat_message.content + if self.stream_outputs: + output_stream = self.model.generate_stream( + input_messages, + stop_sequences=["", "Observation:", "Calling tools:"], + **additional_args, + ) + output_text = "" + with Live("", console=self.logger.console, vertical_overflow="visible") as live: + for event in output_stream: + if event.content is not None: + output_text += event.content + live.update(Markdown(output_text)) + + model_output = output_text + chat_message = ChatMessage(role="assistant", content=model_output) + memory_step.model_output_message = chat_message + model_output = chat_message.content + else: + chat_message: ChatMessage = self.model( + input_messages, + stop_sequences=["", "Observation:", "Calling tools:"], + **additional_args, + ) + memory_step.model_output_message = chat_message + model_output = chat_message.content + self.logger.log_markdown( + content=model_output, + title="Output message of the LLM:", + level=LogLevel.DEBUG, + ) + + # This adds sequence to the history. + # This will nudge ulterior LLM calls to finish with , thus efficiently stopping generation. + if model_output and model_output.strip().endswith("```"): + model_output += "" + memory_step.model_output_message.content = model_output + memory_step.model_output = model_output except Exception as e: raise AgentGenerationError(f"Error in generating model output:\n{e}", self.logger) from e - self.logger.log_markdown( - content=model_output, - title="Output message of the LLM:", - level=LogLevel.DEBUG, - ) - - # Parse + ### Parse output ### try: code_action = fix_final_answer_code(parse_code_blobs(model_output)) except Exception as e: @@ -1245,14 +1340,11 @@ def step(self, memory_step: ActionStep) -> Union[None, Any]: ) ] - # Execute + ### Execute action ### self.logger.log_code(title="Executing parsed code:", content=code_action, level=LogLevel.INFO) is_final_answer = False try: - output, execution_logs, is_final_answer = self.python_executor( - code_action, - self.state, - ) + output, execution_logs, is_final_answer = self.python_executor(code_action) execution_outputs_console = [] if len(execution_logs) > 0: execution_outputs_console += [ @@ -1291,3 +1383,41 @@ def step(self, memory_step: ActionStep) -> Union[None, Any]: self.logger.log(Group(*execution_outputs_console), level=LogLevel.INFO) memory_step.action_output = output return output if is_final_answer else None + + def to_dict(self) -> dict[str, Any]: + """Convert the agent to a dictionary representation. + + Returns: + `dict`: Dictionary representation of the agent. + """ + agent_dict = super().to_dict() + agent_dict["authorized_imports"] = self.authorized_imports + agent_dict["executor_type"] = self.executor_type + agent_dict["executor_kwargs"] = self.executor_kwargs + agent_dict["max_print_outputs_length"] = self.max_print_outputs_length + return agent_dict + + @classmethod + def from_dict(cls, agent_dict: dict[str, Any], **kwargs) -> "CodeAgent": + """Create CodeAgent from a dictionary representation. + + Args: + agent_dict (`dict[str, Any]`): Dictionary representation of the agent. + **kwargs: Additional keyword arguments that will override agent_dict values. + + Returns: + `CodeAgent`: Instance of the CodeAgent class. + """ + # Add CodeAgent-specific parameters to kwargs + code_agent_kwargs = { + "additional_authorized_imports": agent_dict.get("authorized_imports"), + "executor_type": agent_dict.get("executor_type"), + "executor_kwargs": agent_dict.get("executor_kwargs"), + "max_print_outputs_length": agent_dict.get("max_print_outputs_length"), + } + # Filter out None values + code_agent_kwargs = {k: v for k, v in code_agent_kwargs.items() if v is not None} + # Update with any additional kwargs + code_agent_kwargs.update(kwargs) + # Call the parent class's from_dict method + return super().from_dict(agent_dict, **code_agent_kwargs) diff --git a/src/smolagents/cli.py b/src/smolagents/cli.py index bcf984532..ccb8295ef 100644 --- a/src/smolagents/cli.py +++ b/src/smolagents/cli.py @@ -19,15 +19,15 @@ from dotenv import load_dotenv -from smolagents import CodeAgent, HfApiModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel +from smolagents import CodeAgent, InferenceClientModel, LiteLLMModel, Model, OpenAIServerModel, Tool, TransformersModel from smolagents.default_tools import TOOL_MAPPING leopard_prompt = "How many seconds would it take for a leopard at full speed to run through Pont des Arts?" -def parse_arguments(description): - parser = argparse.ArgumentParser(description=description) +def parse_arguments(): + parser = argparse.ArgumentParser(description="Run a CodeAgent with all specified parameters") parser.add_argument( "prompt", type=str, @@ -38,8 +38,8 @@ def parse_arguments(description): parser.add_argument( "--model-type", type=str, - default="HfApiModel", - help="The model type to use (e.g., HfApiModel, OpenAIServerModel, LiteLLMModel, TransformersModel)", + default="InferenceClientModel", + help="The model type to use (e.g., InferenceClientModel, OpenAIServerModel, LiteLLMModel, TransformersModel)", ) parser.add_argument( "--model-id", @@ -66,6 +66,12 @@ def parse_arguments(description): help="The verbosity level, as an int in [0, 1, 2].", ) group = parser.add_argument_group("api options", "Options for API-based model types") + group.add_argument( + "--provider", + type=str, + default=None, + help="The inference provider to use for the model", + ) group.add_argument( "--api-base", type=str, @@ -79,7 +85,13 @@ def parse_arguments(description): return parser.parse_args() -def load_model(model_type: str, model_id: str, api_base: str | None, api_key: str | None) -> Model: +def load_model( + model_type: str, + model_id: str, + api_base: str | None = None, + api_key: str | None = None, + provider: str | None = None, +) -> Model: if model_type == "OpenAIServerModel": return OpenAIServerModel( api_key=api_key or os.getenv("FIREWORKS_API_KEY"), @@ -89,29 +101,37 @@ def load_model(model_type: str, model_id: str, api_base: str | None, api_key: st elif model_type == "LiteLLMModel": return LiteLLMModel( model_id=model_id, - api_key=api_key or os.getenv("OPENAI_API_KEY"), + api_key=api_key, api_base=api_base, ) elif model_type == "TransformersModel": - return TransformersModel(model_id=model_id, device_map="auto", flatten_messages_as_text=False) - elif model_type == "HfApiModel": - return HfApiModel( - token=api_key or os.getenv("HF_API_KEY"), + return TransformersModel(model_id=model_id, device_map="auto") + elif model_type == "InferenceClientModel": + return InferenceClientModel( model_id=model_id, + token=api_key or os.getenv("HF_API_KEY"), + provider=provider, ) else: raise ValueError(f"Unsupported model type: {model_type}") -def main(): +def run_smolagent( + prompt: str, + tools: list[str], + model_type: str, + model_id: str, + api_base: str | None = None, + api_key: str | None = None, + imports: list[str] | None = None, + provider: str | None = None, +) -> None: load_dotenv() - args = parse_arguments(description="Run a CodeAgent with all specified parameters") - - model = load_model(args.model_type, args.model_id, args.api_base, args.api_key) + model = load_model(model_type, model_id, api_base=api_base, api_key=api_key, provider=provider) available_tools = [] - for tool_name in args.tools: + for tool_name in tools: if "/" in tool_name: available_tools.append(Tool.from_space(tool_name)) else: @@ -120,10 +140,24 @@ def main(): else: raise ValueError(f"Tool {tool_name} is not recognized either as a default tool or a Space.") - print(f"Running agent with these tools: {args.tools}") - agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=args.imports) + print(f"Running agent with these tools: {tools}") + agent = CodeAgent(tools=available_tools, model=model, additional_authorized_imports=imports) + + agent.run(prompt) - agent.run(args.prompt) + +def main() -> None: + args = parse_arguments() + run_smolagent( + args.prompt, + args.tools, + args.model_type, + args.model_id, + provider=args.provider, + api_base=args.api_base, + api_key=args.api_key, + imports=args.imports, + ) if __name__ == "__main__": diff --git a/src/smolagents/default_tools.py b/src/smolagents/default_tools.py index 2ea7834f6..d12a38d5a 100644 --- a/src/smolagents/default_tools.py +++ b/src/smolagents/default_tools.py @@ -14,9 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import re from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any from .local_python_executor import ( BASE_BUILTIN_MODULES, @@ -29,7 +28,7 @@ @dataclass class PreTool: name: str - inputs: Dict[str, str] + inputs: dict[str, str] output_type: type task: str description: str @@ -57,7 +56,7 @@ def __init__(self, *args, authorized_imports=None, **kwargs): "type": "string", "description": ( "The code snippet to evaluate. All variables used in this snippet must be defined in this same snippet, " - f"else you will get an error. This code can only import the following python libraries: {authorized_imports}." + f"else you will get an error. This code can only import the following python libraries: {self.authorized_imports}." ), } } @@ -138,7 +137,7 @@ class GoogleSearchTool(Tool): output_type = "string" def __init__(self, provider: str = "serpapi"): - super().__init__(self) + super().__init__() import os self.provider = provider @@ -152,7 +151,7 @@ def __init__(self, provider: str = "serpapi"): if self.api_key is None: raise ValueError(f"Missing API key. Make sure you have '{api_key_env_name}' in your env variables.") - def forward(self, query: str, filter_year: Optional[int] = None) -> str: + def forward(self, query: str, filter_year: int | None = None) -> str: import requests if self.provider == "serpapi": @@ -224,8 +223,14 @@ class VisitWebpageTool(Tool): } output_type = "string" + def __init__(self, max_output_length: int = 40000): + super().__init__() + self.max_output_length = max_output_length + def forward(self, url: str) -> str: try: + import re + import requests from markdownify import markdownify from requests.exceptions import RequestException @@ -246,7 +251,7 @@ def forward(self, url: str) -> str: # Remove multiple line breaks markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) - return truncate_content(markdown_content, 10000) + return truncate_content(markdown_content, self.max_output_length) except requests.exceptions.Timeout: return "The request timed out. Please try again later or check the URL." @@ -256,6 +261,102 @@ def forward(self, url: str) -> str: return f"An unexpected error occurred: {str(e)}" +class WikipediaSearchTool(Tool): + """ + WikipediaSearchTool searches Wikipedia and returns a summary or full text of the given topic, along with the page URL. + + Attributes: + user_agent (str): A custom user-agent string to identify the project. This is required as per Wikipedia API policies, read more here: http://github.com/martin-majlis/Wikipedia-API/blob/master/README.rst + language (str): The language in which to retrieve Wikipedia articles. + http://meta.wikimedia.org/wiki/List_of_Wikipedias + content_type (str): Defines the content to fetch. Can be "summary" for a short summary or "text" for the full article. + extract_format (str): Defines the output format. Can be `"WIKI"` or `"HTML"`. + + Example: + >>> from smolagents import CodeAgent, InferenceClientModel, WikipediaSearchTool + >>> agent = CodeAgent( + >>> tools=[ + >>> WikipediaSearchTool( + >>> user_agent="MyResearchBot (myemail@example.com)", + >>> language="en", + >>> content_type="summary", # or "text" + >>> extract_format="WIKI", + >>> ) + >>> ], + >>> model=InferenceClientModel(), + >>> ) + >>> agent.run("Python_(programming_language)") + """ + + name = "wikipedia_search" + description = "Searches Wikipedia and returns a summary or full text of the given topic, along with the page URL." + inputs = { + "query": { + "type": "string", + "description": "The topic to search on Wikipedia.", + } + } + output_type = "string" + + def __init__( + self, + user_agent: str = "Smolagents (myemail@example.com)", + language: str = "en", + content_type: str = "text", + extract_format: str = "WIKI", + ): + super().__init__() + try: + import wikipediaapi + except ImportError as e: + raise ImportError( + "You must install `wikipedia-api` to run this tool: for instance run `pip install wikipedia-api`" + ) from e + if not user_agent: + raise ValueError("User-agent is required. Provide a meaningful identifier for your project.") + + self.user_agent = user_agent + self.language = language + self.content_type = content_type + + # Map string format to wikipediaapi.ExtractFormat + extract_format_map = { + "WIKI": wikipediaapi.ExtractFormat.WIKI, + "HTML": wikipediaapi.ExtractFormat.HTML, + } + + if extract_format not in extract_format_map: + raise ValueError("Invalid extract_format. Choose between 'WIKI' or 'HTML'.") + + self.extract_format = extract_format_map[extract_format] + + self.wiki = wikipediaapi.Wikipedia( + user_agent=self.user_agent, language=self.language, extract_format=self.extract_format + ) + + def forward(self, query: str) -> str: + try: + page = self.wiki.page(query) + + if not page.exists(): + return f"No Wikipedia page found for '{query}'. Try a different query." + + title = page.title + url = page.fullurl + + if self.content_type == "summary": + text = page.summary + elif self.content_type == "text": + text = page.text + else: + return "โš ๏ธ Invalid `content_type`. Use either 'summary' or 'text'." + + return f"โœ… **Wikipedia Page:** {title}\n\n**Content:** {text}\n\n๐Ÿ”— **Read more:** {url}" + + except Exception as e: + return f"Error fetching Wikipedia summary: {str(e)}" + + class SpeechToTextTool(PipelineTool): default_checkpoint = "openai/whisper-large-v3-turbo" description = "This is a tool that transcribes an audio into text. It returns the transcribed text." @@ -307,5 +408,6 @@ def decode(self, outputs): "DuckDuckGoSearchTool", "GoogleSearchTool", "VisitWebpageTool", + "WikipediaSearchTool", "SpeechToTextTool", ] diff --git a/src/smolagents/e2b_executor.py b/src/smolagents/e2b_executor.py deleted file mode 100644 index 10b0170ee..000000000 --- a/src/smolagents/e2b_executor.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import base64 -import pickle -import re -import textwrap -from io import BytesIO -from typing import Any, List, Tuple - -from PIL import Image - -from .tool_validation import validate_tool_attributes -from .tools import Tool -from .utils import BASE_BUILTIN_MODULES, instance_to_source - - -try: - from dotenv import load_dotenv - - load_dotenv() -except ModuleNotFoundError: - pass - - -class E2BExecutor: - def __init__(self, additional_imports: List[str], tools: List[Tool], logger): - self.logger = logger - try: - from e2b_code_interpreter import Sandbox - except ModuleNotFoundError: - raise ModuleNotFoundError( - """Please install 'e2b' extra to use E2BExecutor: `pip install "smolagents[e2b]"`""" - ) - self.logger = logger - self.logger.log("Initializing E2B executor, hold on...") - - self.custom_tools = {} - self.final_answer = False - self.final_answer_pattern = re.compile(r"final_answer\((.*?)\)") - self.sbx = Sandbox() # "qywp2ctmu2q7jzprcf4j") - # TODO: validate installing agents package or not - # print("Installing agents package on remote executor...") - # self.sbx.commands.run( - # "pip install git+https://github.com/huggingface/smolagents.git", - # timeout=300 - # ) - # print("Installation of agents package finished.") - additional_imports = additional_imports + ["smolagents"] - if len(additional_imports) > 0: - execution = self.sbx.commands.run("pip install " + " ".join(additional_imports)) - if execution.error: - raise Exception(f"Error installing dependencies: {execution.error}") - else: - logger.log(f"Installation of {additional_imports} succeeded!", 0) - - tool_codes = [] - for tool in tools: - validate_tool_attributes(tool.__class__, check_imports=False) - tool_code = instance_to_source(tool, base_cls=Tool) - tool_code = tool_code.replace("from smolagents.tools import Tool", "") - tool_code += f"\n{tool.name} = {tool.__class__.__name__}()\n" - tool_codes.append(tool_code) - - tool_definition_code = "\n".join([f"import {module}" for module in BASE_BUILTIN_MODULES]) - tool_definition_code += textwrap.dedent( - """ - class Tool: - def __call__(self, *args, **kwargs): - return self.forward(*args, **kwargs) - - def forward(self, *args, **kwargs): - pass # to be implemented in child class - """ - ) - tool_definition_code += "\n\n".join(tool_codes) - - tool_definition_execution = self.run_code_raise_errors(tool_definition_code) - self.logger.log(tool_definition_execution.logs) - - def run_code_raise_errors(self, code: str): - if self.final_answer_pattern.search(code) is not None: - self.final_answer = True - execution = self.sbx.run_code( - code, - ) - if execution.error: - execution_logs = "\n".join([str(log) for log in execution.logs.stdout]) - logs = execution_logs - logs += "Executing code yielded an error:" - logs += execution.error.name - logs += execution.error.value - logs += execution.error.traceback - raise ValueError(logs) - return execution - - def __call__(self, code_action: str, additional_args: dict) -> Tuple[Any, Any]: - if len(additional_args) > 0: - # Pickle additional_args to server - import tempfile - - with tempfile.NamedTemporaryFile() as f: - pickle.dump(additional_args, f) - f.flush() - with open(f.name, "rb") as file: - self.sbx.files.write("/home/state.pkl", file) - remote_unloading_code = """import pickle -import os -print("File path", os.path.getsize('/home/state.pkl')) -with open('/home/state.pkl', 'rb') as f: - pickle_dict = pickle.load(f) -locals().update({key: value for key, value in pickle_dict.items()}) -""" - execution = self.run_code_raise_errors(remote_unloading_code) - execution_logs = "\n".join([str(log) for log in execution.logs.stdout]) - self.logger.log(execution_logs, 1) - - execution = self.run_code_raise_errors(code_action) - execution_logs = "\n".join([str(log) for log in execution.logs.stdout]) - if not execution.results: - return None, execution_logs, self.final_answer - else: - for result in execution.results: - if result.is_main_result: - for attribute_name in ["jpeg", "png"]: - if getattr(result, attribute_name) is not None: - image_output = getattr(result, attribute_name) - decoded_bytes = base64.b64decode(image_output.encode("utf-8")) - return Image.open(BytesIO(decoded_bytes)), execution_logs, self.final_answer - for attribute_name in [ - "chart", - "data", - "html", - "javascript", - "json", - "latex", - "markdown", - "pdf", - "svg", - "text", - ]: - if getattr(result, attribute_name) is not None: - return getattr(result, attribute_name), execution_logs, self.final_answer - if self.final_answer: - raise ValueError("No main result returned by executor!") - return None, execution_logs, False - - -__all__ = ["E2BExecutor"] diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index 11094a52c..83fbaff3d 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -16,23 +16,39 @@ import os import re import shutil -from typing import Optional -from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types -from smolagents.agents import ActionStep, MultiStepAgent -from smolagents.memory import MemoryStep +from smolagents.agent_types import AgentAudio, AgentImage, AgentText +from smolagents.agents import MultiStepAgent, PlanningStep +from smolagents.memory import ActionStep, FinalAnswerStep, MemoryStep from smolagents.utils import _is_package_available +def get_step_footnote_content(step_log: MemoryStep, step_name: str) -> str: + """Get a footnote string for a step log with duration and token information""" + step_footnote = f"**{step_name}**" + if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"): + token_str = f" | Input tokens:{step_log.input_token_count:,} | Output tokens: {step_log.output_token_count:,}" + step_footnote += token_str + if hasattr(step_log, "duration"): + step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None + step_footnote += step_duration + step_footnote_content = f"""{step_footnote} """ + return step_footnote_content + + def pull_messages_from_step( step_log: MemoryStep, ): """Extract ChatMessage objects from agent steps with proper nesting""" + if not _is_package_available("gradio"): + raise ModuleNotFoundError( + "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`" + ) import gradio as gr if isinstance(step_log, ActionStep): # Output the step number - step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else "" + step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else "Step" yield gr.ChatMessage(role="assistant", content=f"**{step_number}**") # First yield the thought/reasoning from the LLM @@ -74,76 +90,98 @@ def pull_messages_from_step( metadata={ "title": f"๐Ÿ› ๏ธ Used tool {first_tool_call.name}", "id": parent_id, - "status": "pending", + "status": "done", }, ) yield parent_message_tool - # Nesting execution logs under the tool call if they exist - if hasattr(step_log, "observations") and ( - step_log.observations is not None and step_log.observations.strip() - ): # Only yield execution logs if there's actual content - log_content = step_log.observations.strip() - if log_content: - log_content = re.sub(r"^Execution logs:\s*", "", log_content) - yield gr.ChatMessage( - role="assistant", - content=f"{log_content}", - metadata={"title": "๐Ÿ“ Execution Logs", "parent_id": parent_id, "status": "done"}, - ) - - # Nesting any errors under the tool call - if hasattr(step_log, "error") and step_log.error is not None: + # Display execution logs if they exist + if hasattr(step_log, "observations") and ( + step_log.observations is not None and step_log.observations.strip() + ): # Only yield execution logs if there's actual content + log_content = step_log.observations.strip() + if log_content: + log_content = re.sub(r"^Execution logs:\s*", "", log_content) yield gr.ChatMessage( role="assistant", - content=str(step_log.error), - metadata={"title": "๐Ÿ’ฅ Error", "parent_id": parent_id, "status": "done"}, + content=f"```bash\n{log_content}\n", + metadata={"title": "๐Ÿ“ Execution Logs", "status": "done"}, ) - # Update parent message metadata to done status without yielding a new message - parent_message_tool.metadata["status"] = "done" + # Display any errors + if hasattr(step_log, "error") and step_log.error is not None: + yield gr.ChatMessage( + role="assistant", + content=str(step_log.error), + metadata={"title": "๐Ÿ’ฅ Error", "status": "done"}, + ) + + # Update parent message metadata to done status without yielding a new message + if getattr(step_log, "observations_images", []): + for image in step_log.observations_images: + path_image = AgentImage(image).to_string() + yield gr.ChatMessage( + role="assistant", + content={"path": path_image, "mime_type": f"image/{path_image.split('.')[-1]}"}, + metadata={"title": "๐Ÿ–ผ๏ธ Output Image", "status": "done"}, + ) # Handle standalone errors but not from tool calls - elif hasattr(step_log, "error") and step_log.error is not None: + if hasattr(step_log, "error") and step_log.error is not None: yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "๐Ÿ’ฅ Error"}) - # Calculate duration and token information - step_footnote = f"{step_number}" - if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"): - token_str = ( - f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}" + yield gr.ChatMessage(role="assistant", content=get_step_footnote_content(step_log, step_number)) + yield gr.ChatMessage(role="assistant", content="-----", metadata={"status": "done"}) + + elif isinstance(step_log, PlanningStep): + yield gr.ChatMessage(role="assistant", content="**Planning step**") + yield gr.ChatMessage(role="assistant", content=step_log.plan) + yield gr.ChatMessage(role="assistant", content=get_step_footnote_content(step_log, "Planning step")) + yield gr.ChatMessage(role="assistant", content="-----", metadata={"status": "done"}) + + elif isinstance(step_log, FinalAnswerStep): + final_answer = step_log.final_answer + if isinstance(final_answer, AgentText): + yield gr.ChatMessage( + role="assistant", + content=f"**Final answer:**\n{final_answer.to_string()}\n", + ) + elif isinstance(final_answer, AgentImage): + yield gr.ChatMessage( + role="assistant", + content={"path": final_answer.to_string(), "mime_type": "image/png"}, ) - step_footnote += token_str - if hasattr(step_log, "duration"): - step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None - step_footnote += step_duration - step_footnote = f"""{step_footnote} """ - yield gr.ChatMessage(role="assistant", content=f"{step_footnote}") - yield gr.ChatMessage(role="assistant", content="-----") + elif isinstance(final_answer, AgentAudio): + yield gr.ChatMessage( + role="assistant", + content={"path": final_answer.to_string(), "mime_type": "audio/wav"}, + ) + else: + yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}") + + else: + raise ValueError(f"Unsupported step type: {type(step_log)}") def stream_to_gradio( agent, task: str, + task_images: list | None = None, reset_agent_memory: bool = False, - additional_args: Optional[dict] = None, + additional_args: dict | None = None, ): """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages.""" - if not _is_package_available("gradio"): - raise ModuleNotFoundError( - "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`" - ) - import gradio as gr - total_input_tokens = 0 total_output_tokens = 0 - for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args): + for step_log in agent.run( + task, images=task_images, stream=True, reset=reset_agent_memory, additional_args=additional_args + ): # Track tokens if model provides them if getattr(agent.model, "last_input_token_count", None) is not None: total_input_tokens += agent.model.last_input_token_count total_output_tokens += agent.model.last_output_token_count - if isinstance(step_log, ActionStep): + if isinstance(step_log, (ActionStep, PlanningStep)): step_log.input_token_count = agent.model.last_input_token_count step_log.output_token_count = agent.model.last_output_token_count @@ -152,27 +190,6 @@ def stream_to_gradio( ): yield message - final_answer = step_log # Last log is the run's final_answer - final_answer = handle_agent_output_types(final_answer) - - if isinstance(final_answer, AgentText): - yield gr.ChatMessage( - role="assistant", - content=f"**Final answer:**\n{final_answer.to_string()}\n", - ) - elif isinstance(final_answer, AgentImage): - yield gr.ChatMessage( - role="assistant", - content={"path": final_answer.to_string(), "mime_type": "image/png"}, - ) - elif isinstance(final_answer, AgentAudio): - yield gr.ChatMessage( - role="assistant", - content={"path": final_answer.to_string(), "mime_type": "audio/wav"}, - ) - else: - yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}") - class GradioUI: """A one-line interface to launch your agent in Gradio""" @@ -184,19 +201,32 @@ def __init__(self, agent: MultiStepAgent, file_upload_folder: str | None = None) ) self.agent = agent self.file_upload_folder = file_upload_folder + self.name = getattr(agent, "name") or "Agent interface" + self.description = getattr(agent, "description", None) if self.file_upload_folder is not None: if not os.path.exists(file_upload_folder): os.mkdir(file_upload_folder) - def interact_with_agent(self, prompt, messages): + def interact_with_agent(self, prompt, messages, session_state): import gradio as gr - messages.append(gr.ChatMessage(role="user", content=prompt)) - yield messages - for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False): - messages.append(msg) + # Get the agent type from the template agent + if "agent" not in session_state: + session_state["agent"] = self.agent + + try: + messages.append(gr.ChatMessage(role="user", content=prompt)) + yield messages + + for msg in stream_to_gradio(session_state["agent"], task=prompt, reset_agent_memory=False): + messages.append(msg) + yield messages + + yield messages + except Exception as e: + print(f"Error in interaction: {str(e)}") + messages.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}")) yield messages - yield messages def upload_file(self, file, file_uploads_log, allowed_file_types=None): """ @@ -227,6 +257,8 @@ def upload_file(self, file, file_uploads_log, allowed_file_types=None): return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path] def log_user_message(self, text_input, file_uploads_log): + import gradio as gr + return ( text_input + ( @@ -235,14 +267,56 @@ def log_user_message(self, text_input, file_uploads_log): else "" ), "", + gr.Button(interactive=False), ) - def launch(self, share: bool = False, **kwargs): + def launch(self, share: bool = True, **kwargs): + self.create_app().launch(debug=True, share=share, **kwargs) + + def create_app(self): import gradio as gr - with gr.Blocks(fill_height=True) as demo: + with gr.Blocks(theme="ocean", fill_height=True) as demo: + # Add session state to store session-specific data + session_state = gr.State({}) stored_messages = gr.State([]) file_uploads_log = gr.State([]) + + with gr.Sidebar(): + gr.Markdown( + f"# {self.name.replace('_', ' ').capitalize()}" + "\n> This web ui allows you to interact with a `smolagents` agent that can use tools and execute steps to complete tasks." + + (f"\n\n**Agent description:**\n{self.description}" if self.description else "") + ) + + with gr.Group(): + gr.Markdown("**Your request**", container=True) + text_input = gr.Textbox( + lines=3, + label="Chat Message", + container=False, + placeholder="Enter your prompt here and press Shift+Enter or press the button", + ) + submit_btn = gr.Button("Submit", variant="primary") + + # If an upload folder is provided, enable the upload feature + if self.file_upload_folder is not None: + upload_file = gr.File(label="Upload a file") + upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False) + upload_file.change( + self.upload_file, + [upload_file, file_uploads_log], + [upload_status, file_uploads_log], + ) + + gr.HTML("

Powered by:

") + with gr.Row(): + gr.HTML("""""") + + # Main chat interface chatbot = gr.Chatbot( label="Agent", type="messages", @@ -253,23 +327,39 @@ def launch(self, share: bool = False, **kwargs): resizeable=True, scale=1, ) - # If an upload folder is provided, enable the upload feature - if self.file_upload_folder is not None: - upload_file = gr.File(label="Upload a file") - upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False) - upload_file.change( - self.upload_file, - [upload_file, file_uploads_log], - [upload_status, file_uploads_log], - ) - text_input = gr.Textbox(lines=1, label="Chat Message") + + # Set up event handlers text_input.submit( self.log_user_message, [text_input, file_uploads_log], - [stored_messages, text_input], - ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot]) + [stored_messages, text_input, submit_btn], + ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then( + lambda: ( + gr.Textbox( + interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button" + ), + gr.Button(interactive=True), + ), + None, + [text_input, submit_btn], + ) + + submit_btn.click( + self.log_user_message, + [text_input, file_uploads_log], + [stored_messages, text_input, submit_btn], + ).then(self.interact_with_agent, [stored_messages, chatbot, session_state], [chatbot]).then( + lambda: ( + gr.Textbox( + interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button" + ), + gr.Button(interactive=True), + ), + None, + [text_input, submit_btn], + ) - demo.launch(debug=True, share=share, **kwargs) + return demo __all__ = ["stream_to_gradio", "GradioUI"] diff --git a/src/smolagents/local_python_executor.py b/src/smolagents/local_python_executor.py index a48e1e11e..0bfa53b7f 100644 --- a/src/smolagents/local_python_executor.py +++ b/src/smolagents/local_python_executor.py @@ -21,14 +21,13 @@ import logging import math import re -from collections.abc import Mapping +from collections.abc import Callable, Mapping +from functools import wraps from importlib import import_module -from types import ModuleType -from typing import Any, Callable, Dict, List, Optional, Tuple - -import numpy as np -import pandas as pd +from types import BuiltinFunctionType, FunctionType, ModuleType +from typing import Any +from .tools import Tool from .utils import BASE_BUILTIN_MODULES, truncate_content @@ -59,6 +58,12 @@ def custom_print(*args): return None +def nodunder_getattr(obj, name, default=None): + if name.startswith("__") and name.endswith("__"): + raise InterpreterError(f"Forbidden access to dunder attribute: {name}") + return getattr(obj, name, default) + + BASE_PYTHON_TOOLS = { "print": custom_print, "isinstance": isinstance, @@ -106,7 +111,7 @@ def custom_print(*args): "iter": iter, "divmod": divmod, "callable": callable, - "getattr": getattr, + "getattr": nodunder_getattr, "hasattr": hasattr, "setattr": setattr, "issubclass": issubclass, @@ -114,25 +119,31 @@ def custom_print(*args): "complex": complex, } -DANGEROUS_PATTERNS = ( - "_os", +# Non-exhaustive list of dangerous modules that should not be imported +DANGEROUS_MODULES = [ + "builtins", + "io", + "multiprocessing", "os", - "subprocess", - "_subprocess", + "pathlib", "pty", - "system", - "popen", - "spawn", "shutil", - "sys", - "pathlib", - "io", "socket", - "compile", - "eval", - "exec", - "multiprocessing", -) + "subprocess", + "sys", +] + +DANGEROUS_FUNCTIONS = [ + "builtins.compile", + "builtins.eval", + "builtins.exec", + "builtins.globals", + "builtins.locals", + "builtins.__import__", + "os.popen", + "os.system", + "posix.system", +] class PrintContainer: @@ -211,12 +222,88 @@ def fix_final_answer_code(code: str) -> str: return code +def build_import_tree(authorized_imports: list[str]) -> dict[str, Any]: + tree = {} + for import_path in authorized_imports: + parts = import_path.split(".") + current = tree + for part in parts: + if part not in current: + current[part] = {} + current = current[part] + return tree + + +def check_import_authorized(import_to_check: str, authorized_imports: list[str]) -> bool: + current_node = build_import_tree(authorized_imports) + for part in import_to_check.split("."): + if "*" in current_node: + return True + if part not in current_node: + return False + current_node = current_node[part] + return True + + +def safer_eval(func: Callable): + """ + Decorator to make the evaluation of a function safer by checking its return value. + + Args: + func: Function to make safer. + + Returns: + Callable: Safer function with return value check. + """ + + @wraps(func) + def _check_return( + expression, + state, + static_tools, + custom_tools, + authorized_imports=BASE_BUILTIN_MODULES, + ): + result = func(expression, state, static_tools, custom_tools, authorized_imports=authorized_imports) + if isinstance(result, ModuleType): + if not check_import_authorized(result.__name__, authorized_imports): + raise InterpreterError(f"Forbidden access to module: {result.__name__}") + elif isinstance(result, dict) and result.get("__spec__"): + if not check_import_authorized(result["__name__"], authorized_imports): + raise InterpreterError(f"Forbidden access to module: {result['__name__']}") + elif isinstance(result, (FunctionType, BuiltinFunctionType)): + for qualified_function_name in DANGEROUS_FUNCTIONS: + module_name, function_name = qualified_function_name.rsplit(".", 1) + if ( + function_name not in static_tools + and result.__name__ == function_name + and result.__module__ == module_name + ): + raise InterpreterError(f"Forbidden access to function: {function_name}") + return result + + return _check_return + + +def evaluate_attribute( + expression: ast.Attribute, + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], +) -> Any: + if expression.attr.startswith("__") and expression.attr.endswith("__"): + raise InterpreterError(f"Forbidden access to dunder attribute: {expression.attr}") + value = evaluate_ast(expression.value, state, static_tools, custom_tools, authorized_imports) + return getattr(value, expression.attr) + + def evaluate_unaryop( expression: ast.UnaryOp, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: operand = evaluate_ast(expression.operand, state, static_tools, custom_tools, authorized_imports) if isinstance(expression.op, ast.USub): @@ -233,10 +320,10 @@ def evaluate_unaryop( def evaluate_lambda( lambda_expression: ast.Lambda, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Callable: args = [arg.arg for arg in lambda_expression.args.args] @@ -257,10 +344,10 @@ def lambda_func(*values: Any) -> Any: def evaluate_while( while_loop: ast.While, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> None: iterations = 0 while evaluate_ast(while_loop.test, state, static_tools, custom_tools, authorized_imports): @@ -279,11 +366,13 @@ def evaluate_while( def create_function( func_def: ast.FunctionDef, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Callable: + source_code = ast.unparse(func_def) + def new_func(*args: Any, **kwargs: Any) -> Any: func_state = state.copy() arg_names = [arg.arg for arg in func_def.args.args] @@ -334,15 +423,20 @@ def new_func(*args: Any, **kwargs: Any) -> Any: return result + # Store original AST, source code, and name + new_func.__ast__ = func_def + new_func.__source__ = source_code + new_func.__name__ = func_def.name + return new_func def evaluate_function_def( func_def: ast.FunctionDef, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Callable: custom_tools[func_def.name] = create_function(func_def, state, static_tools, custom_tools, authorized_imports) return custom_tools[func_def.name] @@ -350,10 +444,10 @@ def evaluate_function_def( def evaluate_class_def( class_def: ast.ClassDef, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> type: class_name = class_def.name bases = [evaluate_ast(base, state, static_tools, custom_tools, authorized_imports) for base in class_def.bases] @@ -361,7 +455,7 @@ def evaluate_class_def( for stmt in class_def.body: if isinstance(stmt, ast.FunctionDef): - class_dict[stmt.name] = evaluate_function_def(stmt, state, static_tools, custom_tools, authorized_imports) + class_dict[stmt.name] = evaluate_ast(stmt, state, static_tools, custom_tools, authorized_imports) elif isinstance(stmt, ast.Assign): for target in stmt.targets: if isinstance(target, ast.Name): @@ -380,6 +474,14 @@ def evaluate_class_def( custom_tools, authorized_imports, ) + elif ( + isinstance(stmt, ast.Expr) + and stmt == class_def.body[0] + and isinstance(stmt.value, ast.Constant) + and isinstance(stmt.value.value, str) + ): + # Check if it is a docstring: first statement in class body which is a string literal expression + class_dict["__doc__"] = stmt.value.value else: raise InterpreterError(f"Unsupported statement in class body: {stmt.__class__.__name__}") @@ -388,12 +490,29 @@ def evaluate_class_def( return new_class +def evaluate_annassign( + annassign: ast.AnnAssign, + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], +) -> Any: + # If there's a value to assign, evaluate it + if annassign.value: + value = evaluate_ast(annassign.value, state, static_tools, custom_tools, authorized_imports) + # Set the value for the target + set_value(annassign.target, value, state, static_tools, custom_tools, authorized_imports) + return value + # For declarations without values (x: int), just return None + return None + + def evaluate_augassign( expression: ast.AugAssign, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: def get_current_value(target: ast.AST) -> Any: if isinstance(target, ast.Name): @@ -462,29 +581,30 @@ def get_current_value(target: ast.AST) -> Any: def evaluate_boolop( node: ast.BoolOp, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], -) -> bool: - if isinstance(node.op, ast.And): - for value in node.values: - if not evaluate_ast(value, state, static_tools, custom_tools, authorized_imports): - return False - return True - elif isinstance(node.op, ast.Or): - for value in node.values: - if evaluate_ast(value, state, static_tools, custom_tools, authorized_imports): - return True - return False + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], +) -> Any: + # Determine which value should trigger short-circuit based on operation type: + # - 'and' returns the first falsy value encountered (or the last value if all are truthy) + # - 'or' returns the first truthy value encountered (or the last value if all are falsy) + is_short_circuit_value = (lambda x: not x) if isinstance(node.op, ast.And) else (lambda x: bool(x)) + for value in node.values: + result = evaluate_ast(value, state, static_tools, custom_tools, authorized_imports) + # Short-circuit: return immediately if the condition is met + if is_short_circuit_value(result): + return result + # If no short-circuit occurred, return the last evaluated value + return result def evaluate_binop( binop: ast.BinOp, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: # Recursively evaluate the left and right operands left_val = evaluate_ast(binop.left, state, static_tools, custom_tools, authorized_imports) @@ -521,24 +641,23 @@ def evaluate_binop( def evaluate_assign( assign: ast.Assign, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: result = evaluate_ast(assign.value, state, static_tools, custom_tools, authorized_imports) if len(assign.targets) == 1: target = assign.targets[0] set_value(target, result, state, static_tools, custom_tools, authorized_imports) else: - if len(assign.targets) != len(result): - raise InterpreterError(f"Assign failed: expected {len(result)} values but got {len(assign.targets)}.") expanded_values = [] for tgt in assign.targets: if isinstance(tgt, ast.Starred): expanded_values.extend(result) else: expanded_values.append(result) + for tgt, val in zip(assign.targets, expanded_values): set_value(tgt, val, state, static_tools, custom_tools, authorized_imports) return result @@ -547,10 +666,10 @@ def evaluate_assign( def set_value( target: ast.AST, value: Any, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> None: if isinstance(target, ast.Name): if target.id in static_tools: @@ -577,22 +696,26 @@ def set_value( def evaluate_call( call: ast.Call, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: - if not ( - isinstance(call.func, ast.Attribute) or isinstance(call.func, ast.Name) or isinstance(call.func, ast.Subscript) - ): + if not isinstance(call.func, (ast.Call, ast.Lambda, ast.Attribute, ast.Name, ast.Subscript)): raise InterpreterError(f"This is not a correct function: {call.func}).") - if isinstance(call.func, ast.Attribute): + + func, func_name = None, None + + if isinstance(call.func, ast.Call): + func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports) + elif isinstance(call.func, ast.Lambda): + func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports) + elif isinstance(call.func, ast.Attribute): obj = evaluate_ast(call.func.value, state, static_tools, custom_tools, authorized_imports) func_name = call.func.attr if not hasattr(obj, func_name): raise InterpreterError(f"Object {obj} has no attribute {func_name}") func = getattr(obj, func_name) - elif isinstance(call.func, ast.Name): func_name = call.func.id if func_name in state: @@ -605,20 +728,14 @@ def evaluate_call( func = ERRORS[func_name] else: raise InterpreterError( - f"It is not permitted to evaluate other functions than the provided tools or functions defined/imported in previous code (tried to execute {call.func.id})." + f"Forbidden function evaluation: '{call.func.id}' is not among the explicitly allowed tools or defined/imported in the preceding code" ) - elif isinstance(call.func, ast.Subscript): - value = evaluate_ast(call.func.value, state, static_tools, custom_tools, authorized_imports) - index = evaluate_ast(call.func.slice, state, static_tools, custom_tools, authorized_imports) - if isinstance(value, (list, tuple)): - func = value[index] - else: - raise InterpreterError(f"Cannot subscript object of type {type(value).__name__}") - + func = evaluate_ast(call.func, state, static_tools, custom_tools, authorized_imports) if not callable(func): raise InterpreterError(f"This is not a correct function: {call.func}).") func_name = None + args = [] for arg in call.args: if isinstance(arg, ast.Starred): @@ -647,71 +764,43 @@ def evaluate_call( return super(cls, instance) else: raise InterpreterError("super() takes at most 2 arguments") - else: - if func_name == "print": - state["_print_outputs"] += " ".join(map(str, args)) + "\n" - return None - else: # Assume it's a callable object - if ( - (inspect.getmodule(func) == builtins) - and inspect.isbuiltin(func) - and (func not in static_tools.values()) - ): - raise InterpreterError( - f"Invoking a builtin function that has not been explicitly added as a tool is not allowed ({func_name})." - ) - return func(*args, **kwargs) + elif func_name == "print": + state["_print_outputs"] += " ".join(map(str, args)) + "\n" + return None + else: # Assume it's a callable object + if (inspect.getmodule(func) == builtins) and inspect.isbuiltin(func) and (func not in static_tools.values()): + raise InterpreterError( + f"Invoking a builtin function that has not been explicitly added as a tool is not allowed ({func_name})." + ) + return func(*args, **kwargs) def evaluate_subscript( subscript: ast.Subscript, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: index = evaluate_ast(subscript.slice, state, static_tools, custom_tools, authorized_imports) value = evaluate_ast(subscript.value, state, static_tools, custom_tools, authorized_imports) - - if isinstance(value, str) and isinstance(index, str): - raise InterpreterError("You're trying to subscript a string with a string index, which is impossible") - if isinstance(value, pd.core.indexing._LocIndexer): - parent_object = value.obj - return parent_object.loc[index] - if isinstance(value, pd.core.indexing._iLocIndexer): - parent_object = value.obj - return parent_object.iloc[index] - if isinstance(value, (pd.DataFrame, pd.Series, np.ndarray)): - return value[index] - elif isinstance(value, pd.core.groupby.generic.DataFrameGroupBy): - return value[index] - elif isinstance(index, slice): - return value[index] - elif isinstance(value, (list, tuple)): - if not (-len(value) <= index < len(value)): - raise InterpreterError(f"Index {index} out of bounds for list of length {len(value)}") - return value[int(index)] - elif isinstance(value, str): - if not (-len(value) <= index < len(value)): - raise InterpreterError(f"Index {index} out of bounds for string of length {len(value)}") - return value[index] - elif index in value: + try: return value[index] - else: - error_message = f"Could not index {value} with '{index}'." + except (KeyError, IndexError, TypeError) as e: + error_message = f"Could not index {value} with '{index}': {type(e).__name__}: {e}" if isinstance(index, str) and isinstance(value, Mapping): close_matches = difflib.get_close_matches(index, list(value.keys())) if len(close_matches) > 0: - error_message += f" Maybe you meant one of these indexes instead: {str(close_matches)}" - raise InterpreterError(error_message) + error_message += f". Maybe you meant one of these indexes instead: {str(close_matches)}" + raise InterpreterError(error_message) from e def evaluate_name( name: ast.Name, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: if name.id in state: return state[name.id] @@ -729,10 +818,10 @@ def evaluate_name( def evaluate_condition( condition: ast.Compare, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> bool | object: result = True left = evaluate_ast(condition.left, state, static_tools, custom_tools, authorized_imports) @@ -771,10 +860,10 @@ def evaluate_condition( def evaluate_if( if_statement: ast.If, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: result = None test_result = evaluate_ast(if_statement.test, state, static_tools, custom_tools, authorized_imports) @@ -793,10 +882,10 @@ def evaluate_if( def evaluate_for( for_loop: ast.For, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> Any: result = None iterator = evaluate_ast(for_loop.iter, state, static_tools, custom_tools, authorized_imports) @@ -826,12 +915,12 @@ def evaluate_for( def evaluate_listcomp( listcomp: ast.ListComp, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], -) -> List[Any]: - def inner_evaluate(generators: List[ast.comprehension], index: int, current_state: Dict[str, Any]) -> List[Any]: + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], +) -> list[Any]: + def inner_evaluate(generators: list[ast.comprehension], index: int, current_state: dict[str, Any]) -> list[Any]: if index >= len(generators): return [ evaluate_ast( @@ -868,12 +957,47 @@ def inner_evaluate(generators: List[ast.comprehension], index: int, current_stat return inner_evaluate(listcomp.generators, 0, state) +def evaluate_setcomp( + setcomp: ast.SetComp, + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], +) -> set[Any]: + result = set() + for gen in setcomp.generators: + iter_value = evaluate_ast(gen.iter, state, static_tools, custom_tools, authorized_imports) + for value in iter_value: + new_state = state.copy() + set_value( + gen.target, + value, + new_state, + static_tools, + custom_tools, + authorized_imports, + ) + if all( + evaluate_ast(if_clause, new_state, static_tools, custom_tools, authorized_imports) + for if_clause in gen.ifs + ): + element = evaluate_ast( + setcomp.elt, + new_state, + static_tools, + custom_tools, + authorized_imports, + ) + result.add(element) + return result + + def evaluate_try( try_node: ast.Try, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> None: try: for stmt in try_node.body: @@ -905,10 +1029,10 @@ def evaluate_try( def evaluate_raise( raise_node: ast.Raise, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> None: if raise_node.exc is not None: exc = evaluate_ast(raise_node.exc, state, static_tools, custom_tools, authorized_imports) @@ -929,10 +1053,10 @@ def evaluate_raise( def evaluate_assert( assert_node: ast.Assert, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> None: test_result = evaluate_ast(assert_node.test, state, static_tools, custom_tools, authorized_imports) if not test_result: @@ -947,10 +1071,10 @@ def evaluate_assert( def evaluate_with( with_node: ast.With, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> None: contexts = [] for item in with_node.items: @@ -995,18 +1119,9 @@ def get_safe_module(raw_module, authorized_imports, visited=None): # Copy all attributes by reference, recursively checking modules for attr_name in dir(raw_module): - # Skip dangerous patterns at any level - if any( - pattern in raw_module.__name__.split(".") + [attr_name] - and not check_module_authorized(pattern, authorized_imports) - for pattern in DANGEROUS_PATTERNS - ): - logger.info(f"Skipping dangerous attribute {raw_module.__name__}.{attr_name}") - continue - try: attr_value = getattr(raw_module, attr_name) - except ImportError as e: + except (ImportError, AttributeError) as e: # lazy / dynamic loading module -> INFO log and skip logger.info( f"Skipping import error while copying {raw_module.__name__}.{attr_name}: {type(e).__name__} - {e}" @@ -1021,22 +1136,10 @@ def get_safe_module(raw_module, authorized_imports, visited=None): return safe_module -def check_module_authorized(module_name, authorized_imports): - if "*" in authorized_imports: - return True - else: - module_path = module_name.split(".") - if any([module in DANGEROUS_PATTERNS and module not in authorized_imports for module in module_path]): - return False - # ["A", "B", "C"] -> ["A", "A.B", "A.B.C"] - module_subpaths = [".".join(module_path[:i]) for i in range(1, len(module_path) + 1)] - return any(subpath in authorized_imports for subpath in module_subpaths) - - -def import_modules(expression, state, authorized_imports): +def evaluate_import(expression, state, authorized_imports): if isinstance(expression, ast.Import): for alias in expression.names: - if check_module_authorized(alias.name, authorized_imports): + if check_import_authorized(alias.name, authorized_imports): raw_module = import_module(alias.name) state[alias.asname or alias.name] = get_safe_module(raw_module, authorized_imports) else: @@ -1045,7 +1148,7 @@ def import_modules(expression, state, authorized_imports): ) return None elif isinstance(expression, ast.ImportFrom): - if check_module_authorized(expression.module, authorized_imports): + if check_import_authorized(expression.module, authorized_imports): raw_module = __import__(expression.module, fromlist=[alias.name for alias in expression.names]) module = get_safe_module(raw_module, authorized_imports) if expression.names[0].name == "*": # Handle "from module import *" @@ -1071,11 +1174,11 @@ def import_modules(expression, state, authorized_imports): def evaluate_dictcomp( dictcomp: ast.DictComp, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], -) -> Dict[Any, Any]: + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], +) -> dict[Any, Any]: result = {} for gen in dictcomp.generators: iter_value = evaluate_ast(gen.iter, state, static_tools, custom_tools, authorized_imports) @@ -1113,10 +1216,10 @@ def evaluate_dictcomp( def evaluate_delete( delete_node: ast.Delete, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str], + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str], ) -> None: """ Evaluate a delete statement (del x, del x[y]). @@ -1147,12 +1250,13 @@ def evaluate_delete( raise InterpreterError(f"Deletion of {type(target).__name__} targets is not supported") +@safer_eval def evaluate_ast( expression: ast.AST, - state: Dict[str, Any], - static_tools: Dict[str, Callable], - custom_tools: Dict[str, Callable], - authorized_imports: List[str] = BASE_BUILTIN_MODULES, + state: dict[str, Any], + static_tools: dict[str, Callable], + custom_tools: dict[str, Callable], + authorized_imports: list[str] = BASE_BUILTIN_MODULES, ): """ Evaluate an abstract syntax tree using the content of the variables stored in a state and only evaluating a given @@ -1169,21 +1273,23 @@ def evaluate_ast( static_tools (`Dict[str, Callable]`): Functions that may be called during the evaluation. Trying to change one of these static_tools will raise an error. custom_tools (`Dict[str, Callable]`): - Functions that may be called during the evaluation. These static_tools can be overwritten. + Functions that may be called during the evaluation. These custom_tools can be overwritten. authorized_imports (`List[str]`): The list of modules that can be imported by the code. By default, only a few safe modules are allowed. If it contains "*", it will authorize any import. Use this at your own risk! """ - if state.setdefault("_operations_count", 0) >= MAX_OPERATIONS: + if state.setdefault("_operations_count", {"counter": 0})["counter"] >= MAX_OPERATIONS: raise InterpreterError( f"Reached the max number of operations of {MAX_OPERATIONS}. Maybe there is an infinite loop somewhere in the code, or you're just asking too many calculations." ) - state["_operations_count"] += 1 + state["_operations_count"]["counter"] += 1 common_params = (state, static_tools, custom_tools, authorized_imports) if isinstance(expression, ast.Assign): # Assignment -> we evaluate the assignment which should update the state # We return the variable assigned as it may be used to determine the final result. return evaluate_assign(expression, *common_params) + elif isinstance(expression, ast.AnnAssign): + return evaluate_annassign(expression, *common_params) elif isinstance(expression, ast.AugAssign): return evaluate_augassign(expression, *common_params) elif isinstance(expression, ast.Call): @@ -1196,6 +1302,10 @@ def evaluate_ast( return tuple((evaluate_ast(elt, *common_params) for elt in expression.elts)) elif isinstance(expression, (ast.ListComp, ast.GeneratorExp)): return evaluate_listcomp(expression, *common_params) + elif isinstance(expression, ast.DictComp): + return evaluate_dictcomp(expression, *common_params) + elif isinstance(expression, ast.SetComp): + return evaluate_setcomp(expression, *common_params) elif isinstance(expression, ast.UnaryOp): return evaluate_unaryop(expression, *common_params) elif isinstance(expression, ast.Starred): @@ -1260,20 +1370,17 @@ def evaluate_ast( else: return evaluate_ast(expression.orelse, *common_params) elif isinstance(expression, ast.Attribute): - value = evaluate_ast(expression.value, *common_params) - return getattr(value, expression.attr) + return evaluate_attribute(expression, *common_params) elif isinstance(expression, ast.Slice): return slice( evaluate_ast(expression.lower, *common_params) if expression.lower is not None else None, evaluate_ast(expression.upper, *common_params) if expression.upper is not None else None, evaluate_ast(expression.step, *common_params) if expression.step is not None else None, ) - elif isinstance(expression, ast.DictComp): - return evaluate_dictcomp(expression, *common_params) elif isinstance(expression, ast.While): return evaluate_while(expression, *common_params) elif isinstance(expression, (ast.Import, ast.ImportFrom)): - return import_modules(expression, state, authorized_imports) + return evaluate_import(expression, state, authorized_imports) elif isinstance(expression, ast.ClassDef): return evaluate_class_def(expression, *common_params) elif isinstance(expression, ast.Try): @@ -1304,10 +1411,10 @@ def __init__(self, value): def evaluate_python_code( code: str, - static_tools: Optional[Dict[str, Callable]] = None, - custom_tools: Optional[Dict[str, Callable]] = None, - state: Optional[Dict[str, Any]] = None, - authorized_imports: List[str] = BASE_BUILTIN_MODULES, + static_tools: dict[str, Callable] | None = None, + custom_tools: dict[str, Callable] | None = None, + state: dict[str, Any] | None = None, + authorized_imports: list[str] = BASE_BUILTIN_MODULES, max_print_outputs_length: int = DEFAULT_MAX_LEN_OUTPUT, ): """ @@ -1346,11 +1453,15 @@ def evaluate_python_code( custom_tools = custom_tools if custom_tools is not None else {} result = None state["_print_outputs"] = PrintContainer() + state["_operations_count"] = {"counter": 0} - def final_answer(value): - raise FinalAnswerException(value) + if "final_answer" in static_tools: + previous_final_answer = static_tools["final_answer"] - static_tools["final_answer"] = final_answer + def final_answer(answer): # Using 'answer' as the argument like in the original function + raise FinalAnswerException(previous_final_answer(answer)) + + static_tools["final_answer"] = final_answer try: for node in expression.body: @@ -1375,29 +1486,27 @@ def final_answer(value): ) -class LocalPythonInterpreter: +class PythonExecutor: + pass + + +class LocalPythonExecutor(PythonExecutor): def __init__( self, - additional_authorized_imports: List[str], - tools: Dict, - max_print_outputs_length: Optional[int] = None, + additional_authorized_imports: list[str], + max_print_outputs_length: int | None = None, ): self.custom_tools = {} - self.state = {} + self.state = {"__name__": "__main__"} self.max_print_outputs_length = max_print_outputs_length if max_print_outputs_length is None: self.max_print_outputs_length = DEFAULT_MAX_LEN_OUTPUT self.additional_authorized_imports = additional_authorized_imports self.authorized_imports = list(set(BASE_BUILTIN_MODULES) | set(self.additional_authorized_imports)) - # Add base trusted tools to list - self.static_tools = { - **tools, - **BASE_PYTHON_TOOLS.copy(), - } # TODO: assert self.authorized imports are all installed locally + self.static_tools = None - def __call__(self, code_action: str, additional_variables: Dict) -> Tuple[Any, str, bool]: - self.state.update(additional_variables) + def __call__(self, code_action: str) -> tuple[Any, str, bool]: output, is_final_answer = evaluate_python_code( code_action, static_tools=self.static_tools, @@ -1409,5 +1518,11 @@ def __call__(self, code_action: str, additional_variables: Dict) -> Tuple[Any, s logs = str(self.state["_print_outputs"]) return output, logs, is_final_answer + def send_variables(self, variables: dict): + self.state.update(variables) + + def send_tools(self, tools: dict[str, Tool]): + self.static_tools = {**tools, **BASE_PYTHON_TOOLS.copy()} + -__all__ = ["evaluate_python_code", "LocalPythonInterpreter"] +__all__ = ["evaluate_python_code", "LocalPythonExecutor"] diff --git a/src/smolagents/mcp_client.py b/src/smolagents/mcp_client.py new file mode 100644 index 000000000..000e6e08b --- /dev/null +++ b/src/smolagents/mcp_client.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# coding=utf-8 + +# Copyright 2025 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from types import TracebackType +from typing import TYPE_CHECKING, Any + +from smolagents.tools import Tool + + +__all__ = ["MCPClient"] + +if TYPE_CHECKING: + from mcpadapt.core import StdioServerParameters + + +class MCPClient: + """Manages the connection to an MCP server and make its tools available to SmolAgents. + + Note: tools can only be accessed after the connection has been started with the + `connect()` method, done during the init. If you don't use the context manager + we strongly encourage to use "try ... finally" to ensure the connection is cleaned up. + + Args: + server_parameters (StdioServerParameters | dict[str, Any] | list[StdioServerParameters | dict[str, Any]]): + MCP server parameters (stdio or sse). Can be a list if you want to connect multiple MCPs at once. + + Example: + ```python + # fully managed context manager + stdio + with MCPClient(...) as tools: + # tools are now available + + # context manager + sse + with MCPClient({"url": "http://localhost:8000/sse"}) as tools: + # tools are now available + + # manually manage the connection via the mcp_client object: + try: + mcp_client = MCPClient(...) + tools = mcp_client.get_tools() + + # use your tools here. + finally: + mcp_client.stop() + ``` + """ + + def __init__( + self, + server_parameters: "StdioServerParameters" | dict[str, Any] | list["StdioServerParameters" | dict[str, Any]], + ): + try: + from mcpadapt.core import MCPAdapt + from mcpadapt.smolagents_adapter import SmolAgentsAdapter + except ModuleNotFoundError: + raise ModuleNotFoundError("Please install 'mcp' extra to use MCPClient: `pip install 'smolagents[mcp]'`") + self._adapter = MCPAdapt(server_parameters, SmolAgentsAdapter()) + self._tools: list[Tool] | None = None + self.connect() + + def connect(self): + """Connect to the MCP server and initialize the tools.""" + self._tools: list[Tool] = self._adapter.__enter__() + + def disconnect( + self, + exc_type: type[BaseException] | None = None, + exc_value: BaseException | None = None, + exc_traceback: TracebackType | None = None, + ): + """Disconnect from the MCP server""" + self._adapter.__exit__(exc_type, exc_value, exc_traceback) + + def get_tools(self) -> list[Tool]: + """The SmolAgents tools available from the MCP server. + + Note: for now, this always returns the tools available at the creation of the session, + but it will in a future release return also new tools available from the MCP server if + any at call time. + + Raises: + ValueError: If the MCP server tools is None (usually assuming the server is not started). + + Returns: + list[Tool]: The SmolAgents tools available from the MCP server. + """ + if self._tools is None: + raise ValueError( + "Couldn't retrieve tools from MCP server, run `mcp_client.connect()` first before accessing `tools`" + ) + return self._tools + + def __enter__(self) -> list[Tool]: + """Connect to the MCP server and return the tools directly. + + Note that because of the `.connect` in the init, the mcp_client + is already connected at this point. + """ + return self._tools + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + exc_traceback: TracebackType | None, + ): + """Disconnect from the MCP server.""" + self.disconnect(exc_type, exc_value, exc_traceback) diff --git a/src/smolagents/memory.py b/src/smolagents/memory.py index 5875db596..38fa9e1e9 100644 --- a/src/smolagents/memory.py +++ b/src/smolagents/memory.py @@ -1,6 +1,6 @@ from dataclasses import asdict, dataclass from logging import getLogger -from typing import TYPE_CHECKING, Any, Dict, List, TypedDict, Union +from typing import TYPE_CHECKING, Any, TypedDict from smolagents.models import ChatMessage, MessageRole from smolagents.monitoring import AgentLogger, LogLevel @@ -8,6 +8,8 @@ if TYPE_CHECKING: + import PIL.Image + from smolagents.models import ChatMessage from smolagents.monitoring import AgentLogger @@ -17,7 +19,7 @@ class Message(TypedDict): role: MessageRole - content: str | list[dict] + content: str | list[dict[str, Any]] @dataclass @@ -42,23 +44,23 @@ class MemoryStep: def dict(self): return asdict(self) - def to_messages(self, **kwargs) -> List[Dict[str, Any]]: + def to_messages(self, summary_mode: bool = False) -> list[Message]: raise NotImplementedError @dataclass class ActionStep(MemoryStep): - model_input_messages: List[Message] | None = None - tool_calls: List[ToolCall] | None = None + model_input_messages: list[Message] | None = None + tool_calls: list[ToolCall] | None = None start_time: float | None = None end_time: float | None = None step_number: int | None = None error: AgentError | None = None duration: float | None = None - model_output_message: ChatMessage = None + model_output_message: ChatMessage | None = None model_output: str | None = None observations: str | None = None - observations_images: List[str] | None = None + observations_images: list["PIL.Image.Image"] | None = None action_output: Any = None def dict(self): @@ -77,10 +79,8 @@ def dict(self): "action_output": make_json_serializable(self.action_output), } - def to_messages(self, summary_mode: bool = False, show_model_input_messages: bool = False) -> List[Message]: + def to_messages(self, summary_mode: bool = False) -> list[Message]: messages = [] - if self.model_input_messages is not None and show_model_input_messages: - messages.append(Message(role=MessageRole.SYSTEM, content=self.model_input_messages)) if self.model_output is not None and not summary_mode: messages.append( Message(role=MessageRole.ASSISTANT, content=[{"type": "text", "text": self.model_output.strip()}]) @@ -89,7 +89,7 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo if self.tool_calls is not None: messages.append( Message( - role=MessageRole.ASSISTANT, + role=MessageRole.TOOL_CALL, content=[ { "type": "text", @@ -99,6 +99,20 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo ) ) + if self.observations_images: + messages.append( + Message( + role=MessageRole.USER, + content=[ + { + "type": "image", + "image": image, + } + for image in self.observations_images + ], + ) + ) + if self.observations is not None: messages.append( Message( @@ -106,7 +120,7 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo content=[ { "type": "text", - "text": f"Call id: {self.tool_calls[0].id}\nObservation:\n{self.observations}", + "text": f"Observation:\n{self.observations}", } ], ) @@ -123,54 +137,31 @@ def to_messages(self, summary_mode: bool = False, show_model_input_messages: boo Message(role=MessageRole.TOOL_RESPONSE, content=[{"type": "text", "text": message_content}]) ) - if self.observations_images: - messages.append( - Message( - role=MessageRole.USER, - content=[{"type": "text", "text": "Here are the observed images:"}] - + [ - { - "type": "image", - "image": image, - } - for image in self.observations_images - ], - ) - ) return messages @dataclass class PlanningStep(MemoryStep): - model_input_messages: List[Message] - model_output_message_facts: ChatMessage - facts: str - model_output_message_plan: ChatMessage + model_input_messages: list[Message] + model_output_message: ChatMessage plan: str - def to_messages(self, summary_mode: bool, **kwargs) -> List[Message]: - messages = [] - messages.append( - Message( - role=MessageRole.ASSISTANT, content=[{"type": "text", "text": f"[FACTS LIST]:\n{self.facts.strip()}"}] - ) - ) - - if not summary_mode: # This step is not shown to a model writing a plan to avoid influencing the new plan - messages.append( - Message( - role=MessageRole.ASSISTANT, content=[{"type": "text", "text": f"[PLAN]:\n{self.plan.strip()}"}] - ) - ) - return messages + def to_messages(self, summary_mode: bool = False) -> list[Message]: + if summary_mode: + return [] + return [ + Message(role=MessageRole.ASSISTANT, content=[{"type": "text", "text": self.plan.strip()}]), + Message(role=MessageRole.USER, content=[{"type": "text", "text": "Now proceed and carry out this plan."}]), + # This second message creates a role change to prevent models models from simply continuing the plan message + ] @dataclass class TaskStep(MemoryStep): task: str - task_images: List[str] | None = None + task_images: list["PIL.Image.Image"] | None = None - def to_messages(self, summary_mode: bool = False, **kwargs) -> List[Message]: + def to_messages(self, summary_mode: bool = False) -> list[Message]: content = [{"type": "text", "text": f"New task:\n{self.task}"}] if self.task_images: for image in self.task_images: @@ -183,16 +174,21 @@ def to_messages(self, summary_mode: bool = False, **kwargs) -> List[Message]: class SystemPromptStep(MemoryStep): system_prompt: str - def to_messages(self, summary_mode: bool = False, **kwargs) -> List[Message]: + def to_messages(self, summary_mode: bool = False) -> list[Message]: if summary_mode: return [] return [Message(role=MessageRole.SYSTEM, content=[{"type": "text", "text": self.system_prompt}])] +@dataclass +class FinalAnswerStep(MemoryStep): + final_answer: Any + + class AgentMemory: def __init__(self, system_prompt: str): self.system_prompt = SystemPromptStep(system_prompt=system_prompt) - self.steps: List[Union[TaskStep, ActionStep, PlanningStep]] = [] + self.steps: list[TaskStep | ActionStep | PlanningStep] = [] def reset(self): self.steps = [] @@ -221,14 +217,15 @@ def replay(self, logger: AgentLogger, detailed: bool = False): logger.log_task(step.task, "", level=LogLevel.ERROR) elif isinstance(step, ActionStep): logger.log_rule(f"Step {step.step_number}", level=LogLevel.ERROR) - if detailed: - logger.log_messages(step.model_input_messages) - logger.log_markdown(title="Agent output:", content=step.model_output, level=LogLevel.ERROR) + if detailed and step.model_input_messages is not None: + logger.log_messages(step.model_input_messages, level=LogLevel.ERROR) + if step.model_output is not None: + logger.log_markdown(title="Agent output:", content=step.model_output, level=LogLevel.ERROR) elif isinstance(step, PlanningStep): logger.log_rule("Planning step", level=LogLevel.ERROR) - if detailed: + if detailed and step.model_input_messages is not None: logger.log_messages(step.model_input_messages, level=LogLevel.ERROR) - logger.log_markdown(title="Agent output:", content=step.facts + "\n" + step.plan, level=LogLevel.ERROR) + logger.log_markdown(title="Agent output:", content=step.plan, level=LogLevel.ERROR) __all__ = ["AgentMemory"] diff --git a/src/smolagents/models.py b/src/smolagents/models.py index 2a586edfe..433f8fbe5 100644 --- a/src/smolagents/models.py +++ b/src/smolagents/models.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# coding=utf-8 - # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,24 +14,29 @@ import json import logging import os -import random +import re import uuid +import warnings +from collections.abc import Generator from copy import deepcopy from dataclasses import asdict, dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union - -from huggingface_hub import InferenceClient -from huggingface_hub.utils import is_torch_available -from PIL import Image +from threading import Thread +from typing import TYPE_CHECKING, Any from .tools import Tool -from .utils import _is_package_available, encode_image_base64, make_image_url +from .utils import _is_package_available, encode_image_base64, make_image_url, parse_json_blob if TYPE_CHECKING: + from huggingface_hub import ( + ChatCompletionOutputFunctionDefinition, + ChatCompletionOutputMessage, + ChatCompletionOutputToolCall, + ) from transformers import StoppingCriteriaList + logger = logging.getLogger(__name__) DEFAULT_JSONAGENT_REGEX_GRAMMAR = { @@ -61,15 +63,18 @@ def convert(obj): class ChatMessageToolCallDefinition: arguments: Any name: str - description: Optional[str] = None + description: str | None = None @classmethod - def from_hf_api(cls, tool_call_definition) -> "ChatMessageToolCallDefinition": - return cls( - arguments=tool_call_definition.arguments, - name=tool_call_definition.name, - description=tool_call_definition.description, + def from_hf_api( + cls, tool_call_definition: "ChatCompletionOutputFunctionDefinition" + ) -> "ChatMessageToolCallDefinition": + warnings.warn( + "ChatMessageToolCallDefinition.from_hf_api is deprecated and will be removed in version 1.16.0. " + "Please use ChatMessageToolCallDefinition with asdict() instead.", + FutureWarning, ) + return cls(**asdict(tool_call_definition)) @dataclass @@ -79,33 +84,27 @@ class ChatMessageToolCall: type: str @classmethod - def from_hf_api(cls, tool_call) -> "ChatMessageToolCall": - return cls( - function=ChatMessageToolCallDefinition.from_hf_api(tool_call.function), - id=tool_call.id, - type=tool_call.type, + def from_hf_api(cls, tool_call: "ChatCompletionOutputToolCall") -> "ChatMessageToolCall": + warnings.warn( + "ChatMessageToolCall.from_hf_api is deprecated and will be removed in version 1.16.0. " + "Please use ChatMessageToolCall with asdict() instead.", + FutureWarning, ) + return cls(**asdict(tool_call)) @dataclass class ChatMessage: role: str - content: Optional[str] = None - tool_calls: Optional[List[ChatMessageToolCall]] = None - raw: Optional[Any] = None # Stores the raw output from the API + content: str | None = None + tool_calls: list[ChatMessageToolCall] | None = None + raw: Any | None = None # Stores the raw output from the API def model_dump_json(self): return json.dumps(get_dict_from_nested_dataclasses(self, ignore_key="raw")) @classmethod - def from_hf_api(cls, message, raw) -> "ChatMessage": - tool_calls = None - if getattr(message, "tool_calls", None) is not None: - tool_calls = [ChatMessageToolCall.from_hf_api(tool_call) for tool_call in message.tool_calls] - return cls(role=message.role, content=message.content, tool_calls=tool_calls, raw=raw) - - @classmethod - def from_dict(cls, data: dict) -> "ChatMessage": + def from_dict(cls, data: dict, raw: Any | None = None) -> "ChatMessage": if data.get("tool_calls"): tool_calls = [ ChatMessageToolCall( @@ -114,13 +113,22 @@ def from_dict(cls, data: dict) -> "ChatMessage": for tc in data["tool_calls"] ] data["tool_calls"] = tool_calls - return cls(**data) + return cls(role=data["role"], content=data.get("content"), tool_calls=data.get("tool_calls"), raw=raw) def dict(self): return json.dumps(get_dict_from_nested_dataclasses(self)) + @classmethod + def from_hf_api(cls, message: "ChatCompletionOutputMessage", raw) -> "ChatMessage": + warnings.warn( + "ChatMessage.from_hf_api is deprecated and will be removed in version 1.16.0. " + "Please use ChatMessage.from_dict with asdict() instead.", + FutureWarning, + ) + return cls.from_dict(asdict(message), raw=raw) + -def parse_json_if_needed(arguments: Union[str, dict]) -> Union[str, dict]: +def parse_json_if_needed(arguments: str | dict) -> str | dict: if isinstance(arguments, dict): return arguments else: @@ -130,11 +138,10 @@ def parse_json_if_needed(arguments: Union[str, dict]) -> Union[str, dict]: return arguments -def parse_tool_args_if_needed(message: ChatMessage) -> ChatMessage: - if message.tool_calls is not None: - for tool_call in message.tool_calls: - tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) - return message +@dataclass +class CompletionDelta: + content: str | None = None + tool_calls: list[ChatMessageToolCall] | None = None class MessageRole(str, Enum): @@ -155,7 +162,7 @@ def roles(cls): } -def get_tool_json_schema(tool: Tool) -> Dict: +def get_tool_json_schema(tool: Tool) -> dict: properties = deepcopy(tool.inputs) required = [] for key, value in properties.items(): @@ -177,7 +184,7 @@ def get_tool_json_schema(tool: Tool) -> Dict: } -def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str: +def remove_stop_sequences(content: str, stop_sequences: list[str]) -> str: for stop_seq in stop_sequences: if content[-len(stop_seq) :] == stop_seq: content = content[: -len(stop_seq)] @@ -185,11 +192,11 @@ def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str: def get_clean_message_list( - message_list: List[Dict[str, str]], - role_conversions: Dict[MessageRole, MessageRole] = {}, + message_list: list[dict[str, str | list[dict]]], + role_conversions: dict[MessageRole, MessageRole] | dict[str, str] = {}, convert_images_to_image_urls: bool = False, flatten_messages_as_text: bool = False, -) -> List[Dict[str, str]]: +) -> list[dict[str, str | list[dict]]]: """ Subsequent messages with the same role will be concatenated to a single message. output_message_list is a list of messages that will be used to generate the final message that is chat template compatible with transformers LLM chat template. @@ -200,7 +207,7 @@ def get_clean_message_list( convert_images_to_image_urls (`bool`, default `False`): Whether to convert images to image URLs. flatten_messages_as_text (`bool`, default `False`): Whether to flatten messages as text. """ - output_message_list = [] + output_message_list: list[dict[str, str | list[dict]]] = [] message_list = deepcopy(message_list) # Avoid modifying the original list for message in message_list: role = message["role"] @@ -208,10 +215,11 @@ def get_clean_message_list( raise ValueError(f"Incorrect role {role}, only {MessageRole.roles()} are supported for now.") if role in role_conversions: - message["role"] = role_conversions[role] + message["role"] = role_conversions[role] # type: ignore # encode images if needed if isinstance(message["content"], list): for element in message["content"]: + assert isinstance(element, dict), "Error: this element should be a dict:" + str(element) if element["type"] == "image": assert not flatten_messages_as_text, f"Cannot use images with {flatten_messages_as_text=}" if convert_images_to_image_urls: @@ -227,9 +235,14 @@ def get_clean_message_list( if len(output_message_list) > 0 and message["role"] == output_message_list[-1]["role"]: assert isinstance(message["content"], list), "Error: wrong content:" + str(message["content"]) if flatten_messages_as_text: - output_message_list[-1]["content"] += message["content"][0]["text"] + output_message_list[-1]["content"] += "\n" + message["content"][0]["text"] else: - output_message_list[-1]["content"] += message["content"] + for el in message["content"]: + if el["type"] == "text" and output_message_list[-1]["content"][-1]["type"] == "text": + # Merge consecutive text messages rather than creating new ones + output_message_list[-1]["content"][-1]["text"] += "\n" + el["text"] + else: + output_message_list[-1]["content"].append(el) else: if flatten_messages_as_text: content = message["content"][0]["text"] @@ -239,23 +252,69 @@ def get_clean_message_list( return output_message_list +def get_tool_call_from_text(text: str, tool_name_key: str, tool_arguments_key: str) -> ChatMessageToolCall: + tool_call_dictionary, _ = parse_json_blob(text) + try: + tool_name = tool_call_dictionary[tool_name_key] + except Exception as e: + raise ValueError( + f"Key {tool_name_key=} not found in the generated tool call. Got keys: {list(tool_call_dictionary.keys())} instead" + ) from e + tool_arguments = tool_call_dictionary.get(tool_arguments_key, None) + if isinstance(tool_arguments, str): + tool_arguments = parse_json_if_needed(tool_arguments) + return ChatMessageToolCall( + id=str(uuid.uuid4()), + type="function", + function=ChatMessageToolCallDefinition(name=tool_name, arguments=tool_arguments), + ) + + +def supports_stop_parameter(model_id: str) -> bool: + """ + Check if the model supports the `stop` parameter. + + Not supported with reasoning models openai/o3 and openai/o4-mini (and their versioned variants). + + Args: + model_id (`str`): Model identifier (e.g. "openai/o3", "o4-mini-2025-04-16") + + Returns: + bool: True if the model supports the stop parameter, False otherwise + """ + model_name = model_id.split("/")[-1] + # o3 and o4-mini (including versioned variants, o3-2025-04-16) don't support stop parameter + pattern = r"^(o3[-\d]*|o4-mini[-\d]*)$" + return not re.match(pattern, model_name) + + class Model: - def __init__(self, **kwargs): - self.last_input_token_count = None - self.last_output_token_count = None + def __init__( + self, + flatten_messages_as_text: bool = False, + tool_name_key: str = "name", + tool_arguments_key: str = "arguments", + model_id: str | None = None, + **kwargs, + ): + self.flatten_messages_as_text = flatten_messages_as_text + self.tool_name_key = tool_name_key + self.tool_arguments_key = tool_arguments_key self.kwargs = kwargs + self.last_input_token_count: int | None = None + self.last_output_token_count: int | None = None + self.model_id: str | None = model_id def _prepare_completion_kwargs( self, - messages: List[Dict[str, str]], - stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, - tools_to_call_from: Optional[List[Tool]] = None, - custom_role_conversions: Optional[Dict[str, str]] = None, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + custom_role_conversions: dict[str, str] | None = None, convert_images_to_image_urls: bool = False, - flatten_messages_as_text: bool = False, **kwargs, - ) -> Dict: + ) -> dict[str, Any]: """ Prepare parameters required for model invocation, handling parameter priorities. @@ -265,13 +324,13 @@ def _prepare_completion_kwargs( 3. Default values in self.kwargs """ # Clean and standardize the message list + flatten_messages_as_text = kwargs.pop("flatten_messages_as_text", self.flatten_messages_as_text) messages = get_clean_message_list( messages, role_conversions=custom_role_conversions or tool_role_conversions, convert_images_to_image_urls=convert_images_to_image_urls, flatten_messages_as_text=flatten_messages_as_text, ) - # Use self.kwargs as the base configuration completion_kwargs = { **self.kwargs, @@ -280,7 +339,9 @@ def _prepare_completion_kwargs( # Handle specific parameters if stop_sequences is not None: - completion_kwargs["stop"] = stop_sequences + # Some models do not support stop parameter + if supports_stop_parameter(self.model_id or ""): + completion_kwargs["stop"] = stop_sequences if grammar is not None: completion_kwargs["grammar"] = grammar @@ -298,24 +359,26 @@ def _prepare_completion_kwargs( return completion_kwargs - def get_token_counts(self) -> Dict[str, int]: + def get_token_counts(self) -> dict[str, int]: + if self.last_input_token_count is None or self.last_output_token_count is None: + raise ValueError("Token counts are not available") return { "input_token_count": self.last_input_token_count, "output_token_count": self.last_output_token_count, } - def __call__( + def generate( self, - messages: List[Dict[str, str]], - stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, - tools_to_call_from: Optional[List[Tool]] = None, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, **kwargs, ) -> ChatMessage: """Process the input messages and return the model's response. Parameters: - messages (`List[Dict[str, str]]`): + messages (`list[dict[str, str]]`): A list of message dictionaries to be processed. Each dictionary should have the structure `{"role": "user/system", "content": "message content"}`. stop_sequences (`List[str]`, *optional*): A list of strings that will stop the generation if encountered in the model's output. @@ -329,9 +392,25 @@ def __call__( Returns: `ChatMessage`: A chat message object containing the model's response. """ - pass # To be implemented in child classes! + raise NotImplementedError("This method must be implemented in child classes") + + def __call__(self, *args, **kwargs): + return self.generate(*args, **kwargs) + + def parse_tool_calls(self, message: ChatMessage) -> ChatMessage: + """Sometimes APIs do not return the tool call as a specific object, so we need to parse it.""" + message.role = MessageRole.ASSISTANT # Overwrite role if needed + if not message.tool_calls: + assert message.content is not None, "Message contains no content and no tool calls" + message.tool_calls = [ + get_tool_call_from_text(message.content, self.tool_name_key, self.tool_arguments_key) + ] + assert len(message.tool_calls) > 0, "No tool call was found in the model output" + for tool_call in message.tool_calls: + tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) + return message - def to_dict(self) -> Dict: + def to_dict(self) -> dict: """ Converts the model into a JSON-compatible dictionary. """ @@ -366,7 +445,7 @@ def to_dict(self) -> Dict: return model_dictionary @classmethod - def from_dict(cls, model_dictionary: Dict[str, Any]) -> "Model": + def from_dict(cls, model_dictionary: dict[str, Any]) -> "Model": model_instance = cls( **{ k: v @@ -379,89 +458,109 @@ def from_dict(cls, model_dictionary: Dict[str, Any]) -> "Model": return model_instance -class HfApiModel(Model): - """A class to interact with Hugging Face's Inference API for language model interaction. - - This model allows you to communicate with Hugging Face's models using the Inference API. It can be used in both serverless mode or with a dedicated endpoint, supporting features like stop sequences and grammar customization. +class VLLMModel(Model): + """Model to use [vLLM](https://docs.vllm.ai/) for fast LLM inference and serving. Parameters: - model_id (`str`, *optional*, defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`): - The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub. - provider (`str`, *optional*): - Name of the provider to use for inference. Can be `"replicate"`, `"together"`, `"fal-ai"`, `"sambanova"` or `"hf-inference"`. - defaults to hf-inference (HF Inference API). - token (`str`, *optional*): - Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference API'. - If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'. - If not provided, the class will try to use environment variable 'HF_TOKEN', else use the token stored in the Hugging Face CLI configuration. - timeout (`int`, *optional*, defaults to 120): - Timeout for the API request, in seconds. - custom_role_conversions (`dict[str, str]`, *optional*): - Custom role conversion mapping to convert message roles in others. - Useful for specific models that do not support specific message roles like "system". - **kwargs: - Additional keyword arguments to pass to the Hugging Face API. - - Raises: - ValueError: - If the model name is not provided. - - Example: - ```python - >>> engine = HfApiModel( - ... model_id="Qwen/Qwen2.5-Coder-32B-Instruct", - ... token="your_hf_token_here", - ... max_tokens=5000, - ... ) - >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}] - >>> response = engine(messages, stop_sequences=["END"]) - >>> print(response) - "Quantum mechanics is the branch of physics that studies..." - ``` + model_id (`str`): + The Hugging Face model ID to be used for inference. + This can be a path or model identifier from the Hugging Face model hub. + model_kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments to pass to the vLLM model (like revision, max_model_len, etc.). """ def __init__( self, - model_id: str = "Qwen/Qwen2.5-Coder-32B-Instruct", - provider: Optional[str] = None, - token: Optional[str] = None, - timeout: Optional[int] = 120, - custom_role_conversions: Optional[Dict[str, str]] = None, + model_id, + model_kwargs: dict[str, Any] | None = None, **kwargs, ): + if not _is_package_available("vllm"): + raise ModuleNotFoundError("Please install 'vllm' extra to use VLLMModel: `pip install 'smolagents[vllm]'`") + + from vllm import LLM # type: ignore + from vllm.transformers_utils.tokenizer import get_tokenizer # type: ignore + + self.model_kwargs = model_kwargs or {} super().__init__(**kwargs) self.model_id = model_id - self.provider = provider - if token is None: - token = os.getenv("HF_TOKEN") - self.client = InferenceClient(self.model_id, provider=provider, token=token, timeout=timeout) - self.custom_role_conversions = custom_role_conversions + self.model = LLM(model=model_id, **self.model_kwargs) + assert self.model is not None + self.tokenizer = get_tokenizer(model_id) + self._is_vlm = False # VLLMModel does not support vision models yet. + + def cleanup(self): + import gc + + import torch + from vllm.distributed.parallel_state import ( # type: ignore + destroy_distributed_environment, + destroy_model_parallel, + ) + + destroy_model_parallel() + if self.model is not None: + # taken from https://github.com/vllm-project/vllm/issues/1908#issuecomment-2076870351 + del self.model.llm_engine.model_executor.driver_worker + gc.collect() + destroy_distributed_environment() + torch.cuda.empty_cache() - def __call__( + def generate( self, - messages: List[Dict[str, str]], - stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, - tools_to_call_from: Optional[List[Tool]] = None, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, **kwargs, ) -> ChatMessage: + from vllm import SamplingParams # type: ignore + completion_kwargs = self._prepare_completion_kwargs( messages=messages, + flatten_messages_as_text=(not self._is_vlm), stop_sequences=stop_sequences, grammar=grammar, tools_to_call_from=tools_to_call_from, - convert_images_to_image_urls=True, - custom_role_conversions=self.custom_role_conversions, **kwargs, ) - response = self.client.chat_completion(**completion_kwargs) + messages = completion_kwargs.pop("messages") + prepared_stop_sequences = completion_kwargs.pop("stop", []) + tools = completion_kwargs.pop("tools", None) + completion_kwargs.pop("tool_choice", None) - self.last_input_token_count = response.usage.prompt_tokens - self.last_output_token_count = response.usage.completion_tokens - message = ChatMessage.from_hf_api(response.choices[0].message, raw=response) if tools_to_call_from is not None: - return parse_tool_args_if_needed(message) - return message + prompt = self.tokenizer.apply_chat_template( + messages, + tools=tools, + add_generation_prompt=True, + tokenize=False, + ) + else: + prompt = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + ) + + sampling_params = SamplingParams( + n=kwargs.get("n", 1), + temperature=kwargs.get("temperature", 0.0), + max_tokens=kwargs.get("max_tokens", 2048), + stop=prepared_stop_sequences, + ) + + out = self.model.generate( + prompt, + sampling_params=sampling_params, + ) + output_text = out[0].outputs[0].text + self.last_input_token_count = len(out[0].prompt_token_ids) + self.last_output_token_count = len(out[0].outputs[0].token_ids) + return ChatMessage( + role=MessageRole.ASSISTANT, + content=output_text, + raw={"out": output_text, "completion_kwargs": completion_kwargs}, + ) class MLXModel(Model): @@ -510,50 +609,31 @@ def __init__( trust_remote_code: bool = False, **kwargs, ): - super().__init__(**kwargs) + super().__init__( + flatten_messages_as_text=True, model_id=model_id, **kwargs + ) # mlx-lm doesn't support vision models if not _is_package_available("mlx_lm"): raise ModuleNotFoundError( "Please install 'mlx-lm' extra to use 'MLXModel': `pip install 'smolagents[mlx-lm]'`" ) - import mlx_lm + import mlx_lm # type: ignore self.model_id = model_id self.model, self.tokenizer = mlx_lm.load(model_id, tokenizer_config={"trust_remote_code": trust_remote_code}) self.stream_generate = mlx_lm.stream_generate self.tool_name_key = tool_name_key self.tool_arguments_key = tool_arguments_key + self.is_vlm = False # mlx-lm doesn't support vision models - def _to_message(self, text, tools_to_call_from): - if tools_to_call_from: - # solution for extracting tool JSON without assuming a specific model output format - maybe_json = "{" + text.split("{", 1)[-1][::-1].split("}", 1)[-1][::-1] + "}" - parsed_text = json.loads(maybe_json) - tool_name = parsed_text.get(self.tool_name_key, None) - tool_arguments = parsed_text.get(self.tool_arguments_key, None) - if tool_name: - return ChatMessage( - role="assistant", - content="", - tool_calls=[ - ChatMessageToolCall( - id=uuid.uuid4(), - type="function", - function=ChatMessageToolCallDefinition(name=tool_name, arguments=tool_arguments), - ) - ], - ) - return ChatMessage(role="assistant", content=text) - - def __call__( + def generate( self, - messages: List[Dict[str, str]], - stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, - tools_to_call_from: Optional[List[Tool]] = None, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, **kwargs, ) -> ChatMessage: completion_kwargs = self._prepare_completion_kwargs( - flatten_messages_as_text=True, # mlx-lm doesn't support vision models messages=messages, stop_sequences=stop_sequences, grammar=grammar, @@ -561,7 +641,7 @@ def __call__( **kwargs, ) messages = completion_kwargs.pop("messages") - prepared_stop_sequences = completion_kwargs.pop("stop", []) + stops = completion_kwargs.pop("stop", []) tools = completion_kwargs.pop("tools", None) completion_kwargs.pop("tool_choice", None) @@ -574,17 +654,16 @@ def __call__( self.last_input_token_count = len(prompt_ids) self.last_output_token_count = 0 text = "" - - for _ in self.stream_generate(self.model, self.tokenizer, prompt=prompt_ids, **completion_kwargs): + for response in self.stream_generate(self.model, self.tokenizer, prompt=prompt_ids, **completion_kwargs): self.last_output_token_count += 1 - text += _.text - for stop_sequence in prepared_stop_sequences: - stop_sequence_start = text.rfind(stop_sequence) - if stop_sequence_start != -1: - text = text[:stop_sequence_start] - return self._to_message(text, tools_to_call_from) + text += response.text + if any((stop_index := text.rfind(stop)) != -1 for stop in stops): + text = text[:stop_index] + break - return self._to_message(text, tools_to_call_from) + return ChatMessage( + role=MessageRole.ASSISTANT, content=text, raw={"out": text, "completion_kwargs": completion_kwargs} + ) class TransformersModel(Model): @@ -596,8 +675,9 @@ class TransformersModel(Model): > You must have `transformers` and `torch` installed on your machine. Please run `pip install smolagents[transformers]` if it's not the case. Parameters: - model_id (`str`, *optional*, defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`): + model_id (`str`): The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub. + For example, `"Qwen/Qwen2.5-Coder-32B-Instruct"`. device_map (`str`, *optional*): The device_map to initialize your model with. torch_dtype (`str`, *optional*): @@ -628,67 +708,79 @@ class TransformersModel(Model): def __init__( self, - model_id: Optional[str] = None, - device_map: Optional[str] = None, - torch_dtype: Optional[str] = None, + model_id: str | None = None, + device_map: str | None = None, + torch_dtype: str | None = None, trust_remote_code: bool = False, **kwargs, ): - super().__init__(**kwargs) - if not is_torch_available() or not _is_package_available("transformers"): + try: + import torch + from transformers import ( + AutoModelForCausalLM, + AutoModelForImageTextToText, + AutoProcessor, + AutoTokenizer, + TextIteratorStreamer, + ) + except ModuleNotFoundError: raise ModuleNotFoundError( "Please install 'transformers' extra to use 'TransformersModel': `pip install 'smolagents[transformers]'`" ) - import torch - from transformers import AutoModelForCausalLM, AutoModelForImageTextToText, AutoProcessor, AutoTokenizer - default_model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct" - if model_id is None: - model_id = default_model_id - logger.warning(f"`model_id`not provided, using this default tokenizer for token counts: '{model_id}'") - self.model_id = model_id + if not model_id: + warnings.warn( + "The 'model_id' parameter will be required in version 2.0.0. " + "Please update your code to pass this parameter to avoid future errors. " + "For now, it defaults to 'HuggingFaceTB/SmolLM2-1.7B-Instruct'.", + FutureWarning, + ) + model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct" - default_max_tokens = 5000 + default_max_tokens = 4096 max_new_tokens = kwargs.get("max_new_tokens") or kwargs.get("max_tokens") if not max_new_tokens: kwargs["max_new_tokens"] = default_max_tokens logger.warning( f"`max_new_tokens` not provided, using this default value for `max_new_tokens`: {default_max_tokens}" ) - self.kwargs = kwargs if device_map is None: device_map = "cuda" if torch.cuda.is_available() else "cpu" logger.info(f"Using device: {device_map}") self._is_vlm = False try: - self.model = AutoModelForCausalLM.from_pretrained( + self.model = AutoModelForImageTextToText.from_pretrained( model_id, device_map=device_map, torch_dtype=torch_dtype, trust_remote_code=trust_remote_code, ) - self.tokenizer = AutoTokenizer.from_pretrained(model_id) + self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=trust_remote_code) + self._is_vlm = True + self.streamer = TextIteratorStreamer(self.processor.tokenizer, skip_prompt=True, skip_special_tokens=True) # type: ignore + except ValueError as e: if "Unrecognized configuration class" in str(e): - self.model = AutoModelForImageTextToText.from_pretrained(model_id, device_map=device_map) - self.processor = AutoProcessor.from_pretrained(model_id) - self._is_vlm = True + self.model = AutoModelForCausalLM.from_pretrained( + model_id, + device_map=device_map, + torch_dtype=torch_dtype, + trust_remote_code=trust_remote_code, + ) + self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code) + self.streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) # type: ignore else: raise e except Exception as e: - logger.warning( - f"Failed to load tokenizer and model for {model_id=}: {e}. Loading default tokenizer and model instead from {default_model_id=}." - ) - self.model_id = default_model_id - self.tokenizer = AutoTokenizer.from_pretrained(default_model_id) - self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device_map, torch_dtype=torch_dtype) + raise ValueError(f"Failed to load tokenizer and model for {model_id=}: {e}") from e + super().__init__(flatten_messages_as_text=not self._is_vlm, model_id=model_id, **kwargs) - def make_stopping_criteria(self, stop_sequences: List[str], tokenizer) -> "StoppingCriteriaList": + def make_stopping_criteria(self, stop_sequences: list[str], tokenizer) -> "StoppingCriteriaList": from transformers import StoppingCriteria, StoppingCriteriaList class StopOnStrings(StoppingCriteria): - def __init__(self, stop_strings: List[str], tokenizer): + def __init__(self, stop_strings: list[str], tokenizer): self.stop_strings = stop_strings self.tokenizer = tokenizer self.stream = "" @@ -705,20 +797,18 @@ def __call__(self, input_ids, scores, **kwargs): return StoppingCriteriaList([StopOnStrings(stop_sequences, tokenizer)]) - def __call__( + def _prepare_completion_args( self, - messages: List[Dict[str, str]], - stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, - tools_to_call_from: Optional[List[Tool]] = None, - images: Optional[List[Image.Image]] = None, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, **kwargs, - ) -> ChatMessage: + ) -> dict[str, Any]: completion_kwargs = self._prepare_completion_kwargs( messages=messages, stop_sequences=stop_sequences, grammar=grammar, - flatten_messages_as_text=(not self._is_vlm), **kwargs, ) @@ -730,144 +820,201 @@ def __call__( or kwargs.get("max_tokens") or self.kwargs.get("max_new_tokens") or self.kwargs.get("max_tokens") + or 1024 ) + prompt_tensor = (self.processor if hasattr(self, "processor") else self.tokenizer).apply_chat_template( + messages, # type: ignore + tools=[get_tool_json_schema(tool) for tool in tools_to_call_from] if tools_to_call_from else None, + return_tensors="pt", + add_generation_prompt=True if tools_to_call_from else False, + tokenize=True, + return_dict=True, + ) + prompt_tensor = prompt_tensor.to(self.model.device) # type: ignore + if hasattr(prompt_tensor, "input_ids"): + prompt_tensor = prompt_tensor["input_ids"] - if max_new_tokens: - completion_kwargs["max_new_tokens"] = max_new_tokens - - if hasattr(self, "processor"): - images = [Image.open(image) for image in images] if images else None - prompt_tensor = self.processor.apply_chat_template( - messages, - tools=[get_tool_json_schema(tool) for tool in tools_to_call_from] if tools_to_call_from else None, - return_tensors="pt", - tokenize=True, - return_dict=True, - images=images, - add_generation_prompt=True if tools_to_call_from else False, - ) - else: - prompt_tensor = self.tokenizer.apply_chat_template( - messages, - tools=[get_tool_json_schema(tool) for tool in tools_to_call_from] if tools_to_call_from else None, - return_tensors="pt", - return_dict=True, - add_generation_prompt=True if tools_to_call_from else False, - ) - - prompt_tensor = prompt_tensor.to(self.model.device) - count_prompt_tokens = prompt_tensor["input_ids"].shape[1] - - if stop_sequences: - stopping_criteria = self.make_stopping_criteria( - stop_sequences, tokenizer=self.processor if hasattr(self, "processor") else self.tokenizer - ) - else: - stopping_criteria = None - - out = self.model.generate( - **prompt_tensor, + model_tokenizer = self.processor.tokenizer if hasattr(self, "processor") else self.tokenizer + stopping_criteria = ( + self.make_stopping_criteria(stop_sequences, tokenizer=model_tokenizer) if stop_sequences else None + ) + completion_kwargs["max_new_tokens"] = max_new_tokens + return dict( + inputs=prompt_tensor, + use_cache=True, stopping_criteria=stopping_criteria, **completion_kwargs, ) + + def generate( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> ChatMessage: + generation_kwargs = self._prepare_completion_args( + messages=messages, + stop_sequences=stop_sequences, + grammar=grammar, + tools_to_call_from=tools_to_call_from, + **kwargs, + ) + count_prompt_tokens = generation_kwargs["inputs"].shape[1] # type: ignore + out = self.model.generate( + **generation_kwargs, + ) generated_tokens = out[0, count_prompt_tokens:] if hasattr(self, "processor"): - output = self.processor.decode(generated_tokens, skip_special_tokens=True) + output_text = self.processor.decode(generated_tokens, skip_special_tokens=True) else: - output = self.tokenizer.decode(generated_tokens, skip_special_tokens=True) + output_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True) self.last_input_token_count = count_prompt_tokens self.last_output_token_count = len(generated_tokens) if stop_sequences is not None: - output = remove_stop_sequences(output, stop_sequences) + output_text = remove_stop_sequences(output_text, stop_sequences) + + return ChatMessage( + role=MessageRole.ASSISTANT, + content=output_text, + raw={ + "out": output_text, + "completion_kwargs": {key: value for key, value in generation_kwargs.items() if key != "inputs"}, + }, + ) - if tools_to_call_from is None: - return ChatMessage( - role="assistant", - content=output, - raw={"out": out, "completion_kwargs": completion_kwargs}, - ) - else: - if "Action:" in output: - output = output.split("Action:", 1)[1].strip() - try: - start_index = output.index("{") - end_index = output.rindex("}") - output = output[start_index : end_index + 1] - except Exception as e: - raise Exception("No json blob found in output!") from e - - try: - parsed_output = json.loads(output) - except json.JSONDecodeError as e: - raise ValueError(f"Tool call '{output}' has an invalid JSON structure: {e}") - tool_name = parsed_output.get("name") - tool_arguments = parsed_output.get("arguments") - return ChatMessage( - role="assistant", - content="", - tool_calls=[ - ChatMessageToolCall( - id="".join(random.choices("0123456789", k=5)), - type="function", - function=ChatMessageToolCallDefinition(name=tool_name, arguments=tool_arguments), - ) - ], - raw={"out": out, "completion_kwargs": completion_kwargs}, - ) + def generate_stream( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> Generator: + generation_kwargs = self._prepare_completion_args( + messages=messages, + stop_sequences=stop_sequences, + grammar=grammar, + tools_to_call_from=tools_to_call_from, + **kwargs, + ) + count_prompt_tokens = generation_kwargs["inputs"].shape[1] # type: ignore + + thread = Thread(target=self.model.generate, kwargs={"streamer": self.streamer, **generation_kwargs}) + thread.start() + + self.last_output_token_count = 0 + + # Generate with streaming + for new_text in self.streamer: + yield CompletionDelta(content=new_text, tool_calls=None) + self.last_output_token_count += 1 + self.last_input_token_count = count_prompt_tokens + thread.join() + + +class ApiModel(Model): + """ + Base class for API-based language models. -class LiteLLMModel(Model): - """This model connects to [LiteLLM](https://www.litellm.ai/) as a gateway to hundreds of LLMs. + This class serves as a foundation for implementing models that interact with + external APIs. It handles the common functionality for managing model IDs, + custom role mappings, and API client connections. + + Parameters: + model_id (`str`): + The identifier for the model to be used with the API. + custom_role_conversions (`dict[str, str`], **optional**): + Mapping to convert between internal role names and API-specific role names. Defaults to None. + client (`Any`, **optional**): + Pre-configured API client instance. If not provided, a default client will be created. Defaults to None. + **kwargs: Additional keyword arguments to pass to the parent class. + """ + + def __init__( + self, model_id: str, custom_role_conversions: dict[str, str] | None = None, client: Any | None = None, **kwargs + ): + super().__init__(model_id=model_id, **kwargs) + self.custom_role_conversions = custom_role_conversions or {} + self.client = client or self.create_client() + + def create_client(self): + """Create the API client for the specific service.""" + raise NotImplementedError("Subclasses must implement this method to create a client") + + +class LiteLLMModel(ApiModel): + """Model to use [LiteLLM Python SDK](https://docs.litellm.ai/docs/#litellm-python-sdk) to access hundreds of LLMs. Parameters: model_id (`str`): The model identifier to use on the server (e.g. "gpt-3.5-turbo"). api_base (`str`, *optional*): - The base URL of the OpenAI-compatible API server. + The base URL of the provider API to call the model. api_key (`str`, *optional*): The API key to use for authentication. custom_role_conversions (`dict[str, str]`, *optional*): Custom role conversion mapping to convert message roles in others. Useful for specific models that do not support specific message roles like "system". + flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text. + Defaults to `True` for models that start with "ollama", "groq", "cerebras". **kwargs: Additional keyword arguments to pass to the OpenAI API. """ def __init__( self, - model_id: str = "anthropic/claude-3-5-sonnet-20240620", - api_base=None, - api_key=None, - custom_role_conversions: Optional[Dict[str, str]] = None, + model_id: str | None = None, + api_base: str | None = None, + api_key: str | None = None, + custom_role_conversions: dict[str, str] | None = None, + flatten_messages_as_text: bool | None = None, **kwargs, ): - super().__init__(**kwargs) - self.model_id = model_id + if not model_id: + warnings.warn( + "The 'model_id' parameter will be required in version 2.0.0. " + "Please update your code to pass this parameter to avoid future errors. " + "For now, it defaults to 'anthropic/claude-3-5-sonnet-20240620'.", + FutureWarning, + ) + model_id = "anthropic/claude-3-5-sonnet-20240620" self.api_base = api_base self.api_key = api_key - self.custom_role_conversions = custom_role_conversions - self.flatten_messages_as_text = ( - kwargs.get("flatten_messages_as_text") - if "flatten_messages_as_text" in kwargs - else self.model_id.startswith(("ollama", "groq", "cerebras")) + flatten_messages_as_text = ( + flatten_messages_as_text + if flatten_messages_as_text is not None + else model_id.startswith(("ollama", "groq", "cerebras")) + ) + super().__init__( + model_id=model_id, + custom_role_conversions=custom_role_conversions, + flatten_messages_as_text=flatten_messages_as_text, + **kwargs, ) - def __call__( - self, - messages: List[Dict[str, str]], - stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, - tools_to_call_from: Optional[List[Tool]] = None, - **kwargs, - ) -> ChatMessage: + def create_client(self): + """Create the LiteLLM client.""" try: import litellm - except ModuleNotFoundError: + except ModuleNotFoundError as e: raise ModuleNotFoundError( "Please install 'litellm' extra to use LiteLLMModel: `pip install 'smolagents[litellm]'`" - ) + ) from e + + return litellm + def generate( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> ChatMessage: completion_kwargs = self._prepare_completion_kwargs( messages=messages, stop_sequences=stop_sequences, @@ -877,26 +1024,301 @@ def __call__( api_base=self.api_base, api_key=self.api_key, convert_images_to_image_urls=True, - flatten_messages_as_text=self.flatten_messages_as_text, custom_role_conversions=self.custom_role_conversions, **kwargs, ) - response = litellm.completion(**completion_kwargs) + response = self.client.completion(**completion_kwargs) self.last_input_token_count = response.usage.prompt_tokens self.last_output_token_count = response.usage.completion_tokens - message = ChatMessage.from_dict( - response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}) + return ChatMessage.from_dict( + response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}), + raw=response, ) - message.raw = response - if tools_to_call_from is not None: - return parse_tool_args_if_needed(message) - return message + def generate_stream( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> Generator: + if tools_to_call_from: + raise NotImplementedError("Streaming is not yet supported for tool calling") + completion_kwargs = self._prepare_completion_kwargs( + messages=messages, + stop_sequences=stop_sequences, + grammar=grammar, + tools_to_call_from=tools_to_call_from, + model=self.model_id, + custom_role_conversions=self.custom_role_conversions, + convert_images_to_image_urls=True, + **kwargs, + ) + for event in self.client.completion(**completion_kwargs, stream=True, stream_options={"include_usage": True}): + if event.choices: + if event.choices[0].delta is None: + if not getattr(event.choices[0], "finish_reason", None): + raise ValueError(f"No content or tool calls in event: {event}") + else: + yield CompletionDelta( + content=event.choices[0].delta.content, + ) + if getattr(event, "usage", None): + self.last_input_token_count = event.usage.prompt_tokens + self.last_output_token_count = event.usage.completion_tokens -class OpenAIServerModel(Model): +class LiteLLMRouterModel(LiteLLMModel): + """Routerโ€‘based client for interacting with the [LiteLLM Python SDK Router](https://docs.litellm.ai/docs/routing). + + This class provides a high-level interface for distributing requests among multiple language models using + the LiteLLM SDK's routing capabilities. It is responsible for initializing and configuring the router client, + applying custom role conversions, and managing message formatting to ensure seamless integration with various LLMs. + + Parameters: + model_id (`str`): + Identifier for the model group to use from the model list (e.g., "model-group-1"). + model_list (`list[dict[str, Any]]`): + Model configurations to be used for routing. + Each configuration should include the model group name and any necessary parameters. + For more details, refer to the [LiteLLM Routing](https://docs.litellm.ai/docs/routing#quick-start) documentation. + client_kwargs (`dict[str, Any]`, *optional*): + Additional configuration parameters for the Router client. For more details, see the + [LiteLLM Routing Configurations](https://docs.litellm.ai/docs/routing). + custom_role_conversions (`dict[str, str]`, *optional*): + Custom role conversion mapping to convert message roles in others. + Useful for specific models that do not support specific message roles like "system". + flatten_messages_as_text (`bool`, *optional*): Whether to flatten messages as text. + Defaults to `True` for models that start with "ollama", "groq", "cerebras". + **kwargs: + Additional keyword arguments to pass to the LiteLLM Router completion method. + + Example: + ```python + >>> import os + >>> from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMRouterModel + >>> os.environ["OPENAI_API_KEY"] = "" + >>> os.environ["AWS_ACCESS_KEY_ID"] = "" + >>> os.environ["AWS_SECRET_ACCESS_KEY"] = "" + >>> os.environ["AWS_REGION"] = "" + >>> llm_loadbalancer_model_list = [ + ... { + ... "model_name": "model-group-1", + ... "litellm_params": { + ... "model": "gpt-4o-mini", + ... "api_key": os.getenv("OPENAI_API_KEY"), + ... }, + ... }, + ... { + ... "model_name": "model-group-1", + ... "litellm_params": { + ... "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + ... "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), + ... "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), + ... "aws_region_name": os.getenv("AWS_REGION"), + ... }, + ... }, + >>> ] + >>> model = LiteLLMRouterModel( + ... model_id="model-group-1", + ... model_list=llm_loadbalancer_model_list, + ... client_kwargs={ + ... "routing_strategy":"simple-shuffle" + ... } + >>> ) + >>> agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) + >>> agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") + ``` + """ + + def __init__( + self, + model_id: str, + model_list: list[dict[str, Any]], + client_kwargs: dict[str, Any] | None = None, + custom_role_conversions: dict[str, str] | None = None, + flatten_messages_as_text: bool | None = None, + **kwargs, + ): + self.client_kwargs = { + "model_list": model_list, + **(client_kwargs or {}), + } + super().__init__( + model_id=model_id, + custom_role_conversions=custom_role_conversions, + flatten_messages_as_text=flatten_messages_as_text, + **kwargs, + ) + + def create_client(self): + try: + from litellm import Router + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "Please install 'litellm' extra to use LiteLLMRouterModel: `pip install 'smolagents[litellm]'`" + ) from e + return Router(**self.client_kwargs) + + +class InferenceClientModel(ApiModel): + """A class to interact with Hugging Face's Inference Providers for language model interaction. + + This model allows you to communicate with Hugging Face's models using Inference Providers. It can be used in both serverless mode or with a dedicated endpoint, supporting features like stop sequences and grammar customization. + + Providers include Cerebras, Cohere, Fal, Fireworks, HF-Inference, Hyperbolic, Nebius, Novita, Replicate, SambaNova, Together, and more. + + Parameters: + model_id (`str`, *optional*, default `"Qwen/Qwen2.5-Coder-32B-Instruct"`): + The Hugging Face model ID to be used for inference. + This can be a model identifier from the Hugging Face model hub or a URL to a deployed Inference Endpoint. + Currently, it defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`, but this may change in the future. + provider (`str`, *optional*): + Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"`, `"together"`, etc. + Currently, it defaults to hf-inference (HF Inference API). + token (`str`, *optional*): + Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference Providers'. + If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'. + If not provided, the class will try to use environment variable 'HF_TOKEN', else use the token stored in the Hugging Face CLI configuration. + timeout (`int`, *optional*, defaults to 120): + Timeout for the API request, in seconds. + client_kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments to pass to the Hugging Face InferenceClient. + custom_role_conversions (`dict[str, str]`, *optional*): + Custom role conversion mapping to convert message roles in others. + Useful for specific models that do not support specific message roles like "system". + api_key (`str`, *optional*): + Token to use for authentication. This is a duplicated argument from `token` to make [`InferenceClientModel`] + follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None. + **kwargs: + Additional keyword arguments to pass to the Hugging Face API. + + Raises: + ValueError: + If the model name is not provided. + + Example: + ```python + >>> engine = InferenceClientModel( + ... model_id="Qwen/Qwen2.5-Coder-32B-Instruct", + ... provider="together", + ... token="your_hf_token_here", + ... max_tokens=5000, + ... ) + >>> messages = [{"role": "user", "content": "Explain quantum mechanics in simple terms."}] + >>> response = engine(messages, stop_sequences=["END"]) + >>> print(response) + "Quantum mechanics is the branch of physics that studies..." + ``` + """ + + def __init__( + self, + model_id: str = "Qwen/Qwen2.5-Coder-32B-Instruct", + provider: str | None = None, + token: str | None = None, + timeout: int = 120, + client_kwargs: dict[str, Any] | None = None, + custom_role_conversions: dict[str, str] | None = None, + api_key: str | None = None, + **kwargs, + ): + if token is not None and api_key is not None: + raise ValueError( + "Received both `token` and `api_key` arguments. Please provide only one of them." + " `api_key` is an alias for `token` to make the API compatible with OpenAI's client." + " It has the exact same behavior as `token`." + ) + token = token if token is not None else api_key + if token is None: + token = os.getenv("HF_TOKEN") + self.client_kwargs = { + **(client_kwargs or {}), + "model": model_id, + "provider": provider, + "token": token, + "timeout": timeout, + } + super().__init__(model_id=model_id, custom_role_conversions=custom_role_conversions, **kwargs) + + def create_client(self): + """Create the Hugging Face client.""" + from huggingface_hub import InferenceClient + + return InferenceClient(**self.client_kwargs) + + def generate( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> ChatMessage: + completion_kwargs = self._prepare_completion_kwargs( + messages=messages, + stop_sequences=stop_sequences, + grammar=grammar, + tools_to_call_from=tools_to_call_from, + convert_images_to_image_urls=True, + custom_role_conversions=self.custom_role_conversions, + **kwargs, + ) + response = self.client.chat_completion(**completion_kwargs) + + self.last_input_token_count = response.usage.prompt_tokens + self.last_output_token_count = response.usage.completion_tokens + return ChatMessage.from_dict(asdict(response.choices[0].message), raw=response) + + def generate_stream( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> Generator: + if tools_to_call_from: + raise NotImplementedError("Streaming is not yet supported for tool calling") + completion_kwargs = self._prepare_completion_kwargs( + messages=messages, + stop_sequences=stop_sequences, + grammar=grammar, + tools_to_call_from=tools_to_call_from, + model=self.model_id, + custom_role_conversions=self.custom_role_conversions, + convert_images_to_image_urls=True, + **kwargs, + ) + for event in self.client.chat.completions.create( + **completion_kwargs, stream=True, stream_options={"include_usage": True} + ): + if event.choices: + if event.choices[0].delta is None: + if not getattr(event.choices[0], "finish_reason", None): + raise ValueError(f"No content or tool calls in event: {event}") + else: + yield CompletionDelta( + content=event.choices[0].delta.content, + ) + if getattr(event, "usage", None): + self.last_input_token_count = event.usage.prompt_tokens + self.last_output_token_count = event.usage.completion_tokens + + +class HfApiModel(InferenceClientModel): + def __new__(cls, *args, **kwargs): + warnings.warn( + "HfApiModel has been renamed to InferenceClientModel to more closely follow the name of the underlying Inference library.", + DeprecationWarning, + ) + return super().__new__(cls) + + +class OpenAIServerModel(ApiModel): """This model connects to an OpenAI-compatible API server. Parameters: @@ -915,6 +1337,8 @@ class OpenAIServerModel(Model): custom_role_conversions (`dict[str, str]`, *optional*): Custom role conversion mapping to convert message roles in others. Useful for specific models that do not support specific message roles like "system". + flatten_messages_as_text (`bool`, default `False`): + Whether to flatten messages as text. **kwargs: Additional keyword arguments to pass to the OpenAI API. """ @@ -922,38 +1346,80 @@ class OpenAIServerModel(Model): def __init__( self, model_id: str, - api_base: Optional[str] = None, - api_key: Optional[str] = None, - organization: Optional[str] | None = None, - project: Optional[str] | None = None, - client_kwargs: Optional[Dict[str, Any]] = None, - custom_role_conversions: Optional[Dict[str, str]] = None, + api_base: str | None = None, + api_key: str | None = None, + organization: str | None = None, + project: str | None = None, + client_kwargs: dict[str, Any] | None = None, + custom_role_conversions: dict[str, str] | None = None, + flatten_messages_as_text: bool = False, **kwargs, ): + self.client_kwargs = { + **(client_kwargs or {}), + "api_key": api_key, + "base_url": api_base, + "organization": organization, + "project": project, + } + super().__init__( + model_id=model_id, + custom_role_conversions=custom_role_conversions, + flatten_messages_as_text=flatten_messages_as_text, + **kwargs, + ) + + def create_client(self): try: import openai - except ModuleNotFoundError: + except ModuleNotFoundError as e: raise ModuleNotFoundError( "Please install 'openai' extra to use OpenAIServerModel: `pip install 'smolagents[openai]'`" - ) from None + ) from e - super().__init__(**kwargs) - self.model_id = model_id - self.client = openai.OpenAI( - base_url=api_base, - api_key=api_key, - organization=organization, - project=project, - **(client_kwargs or {}), + return openai.OpenAI(**self.client_kwargs) + + def generate_stream( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> Generator: + if tools_to_call_from: + raise NotImplementedError("Streaming is not yet supported for tool calling") + completion_kwargs = self._prepare_completion_kwargs( + messages=messages, + stop_sequences=stop_sequences, + grammar=grammar, + tools_to_call_from=tools_to_call_from, + model=self.model_id, + custom_role_conversions=self.custom_role_conversions, + convert_images_to_image_urls=True, + **kwargs, ) - self.custom_role_conversions = custom_role_conversions + for event in self.client.chat.completions.create( + **completion_kwargs, stream=True, stream_options={"include_usage": True} + ): + if event.choices: + if event.choices[0].delta is None: + if not getattr(event.choices[0], "finish_reason", None): + raise ValueError(f"No content or tool calls in event: {event}") + else: + yield CompletionDelta( + content=event.choices[0].delta.content, + ) + if getattr(event, "usage", None): + self.last_input_token_count = event.usage.prompt_tokens + self.last_output_token_count = event.usage.completion_tokens - def __call__( + def generate( self, - messages: List[Dict[str, str]], - stop_sequences: Optional[List[str]] = None, - grammar: Optional[str] = None, - tools_to_call_from: Optional[List[Tool]] = None, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, **kwargs, ) -> ChatMessage: completion_kwargs = self._prepare_completion_kwargs( @@ -970,13 +1436,10 @@ def __call__( self.last_input_token_count = response.usage.prompt_tokens self.last_output_token_count = response.usage.completion_tokens - message = ChatMessage.from_dict( - response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}) + return ChatMessage.from_dict( + response.choices[0].message.model_dump(include={"role", "content", "tool_calls"}), + raw=response, ) - message.raw = response - if tools_to_call_from is not None: - return parse_tool_args_if_needed(message) - return message class AzureOpenAIServerModel(OpenAIServerModel): @@ -991,6 +1454,8 @@ class AzureOpenAIServerModel(OpenAIServerModel): The API key to use for authentication. If not provided, it will be inferred from the `AZURE_OPENAI_API_KEY` environment variable. api_version (`str`, *optional*): The API version to use. If not provided, it will be inferred from the `OPENAI_API_VERSION` environment variable. + client_kwargs (`dict[str, Any]`, *optional*): + Additional keyword arguments to pass to the AzureOpenAI client (like organization, project, max_retries etc.). custom_role_conversions (`dict[str, str]`, *optional*): Custom role conversion mapping to convert message roles in others. Useful for specific models that do not support specific message roles like "system". @@ -1001,21 +1466,207 @@ class AzureOpenAIServerModel(OpenAIServerModel): def __init__( self, model_id: str, - azure_endpoint: Optional[str] = None, - api_key: Optional[str] = None, - api_version: Optional[str] = None, - custom_role_conversions: Optional[Dict[str, str]] = None, + azure_endpoint: str | None = None, + api_key: str | None = None, + api_version: str | None = None, + client_kwargs: dict[str, Any] | None = None, + custom_role_conversions: dict[str, str] | None = None, + **kwargs, + ): + client_kwargs = client_kwargs or {} + client_kwargs.update( + { + "api_version": api_version, + "azure_endpoint": azure_endpoint, + } + ) + super().__init__( + model_id=model_id, + api_key=api_key, + client_kwargs=client_kwargs, + custom_role_conversions=custom_role_conversions, + **kwargs, + ) + + def create_client(self): + try: + import openai + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "Please install 'openai' extra to use AzureOpenAIServerModel: `pip install 'smolagents[openai]'`" + ) from e + + return openai.AzureOpenAI(**self.client_kwargs) + + +class AmazonBedrockServerModel(ApiModel): + """ + A model class for interacting with Amazon Bedrock Server models through the Bedrock API. + + This class provides an interface to interact with various Bedrock language models, + allowing for customized model inference, guardrail configuration, message handling, + and other parameters allowed by boto3 API. + + Parameters: + model_id (`str`): + The model identifier to use on Bedrock (e.g. "us.amazon.nova-pro-v1:0"). + client (`boto3.client`, *optional*): + A custom boto3 client for AWS interactions. If not provided, a default client will be created. + client_kwargs (dict[str, Any], *optional*): + Keyword arguments used to configure the boto3 client if it needs to be created internally. + Examples include `region_name`, `config`, or `endpoint_url`. + custom_role_conversions (`dict[str, str]`, *optional*): + Custom role conversion mapping to convert message roles in others. + Useful for specific models that do not support specific message roles like "system". + Defaults to converting all roles to "user" role to enable using all the Bedrock models. + flatten_messages_as_text (`bool`, default `False`): + Whether to flatten messages as text. + **kwargs + Additional keyword arguments passed directly to the underlying API calls. + + Example: + Creating a model instance with default settings: + >>> bedrock_model = AmazonBedrockServerModel( + ... model_id='us.amazon.nova-pro-v1:0' + ... ) + + Creating a model instance with a custom boto3 client: + >>> import boto3 + >>> client = boto3.client('bedrock-runtime', region_name='us-west-2') + >>> bedrock_model = AmazonBedrockServerModel( + ... model_id='us.amazon.nova-pro-v1:0', + ... client=client + ... ) + + Creating a model instance with client_kwargs for internal client creation: + >>> bedrock_model = AmazonBedrockServerModel( + ... model_id='us.amazon.nova-pro-v1:0', + ... client_kwargs={'region_name': 'us-west-2', 'endpoint_url': 'https://custom-endpoint.com'} + ... ) + + Creating a model instance with inference and guardrail configurations: + >>> additional_api_config = { + ... "inferenceConfig": { + ... "maxTokens": 3000 + ... }, + ... "guardrailConfig": { + ... "guardrailIdentifier": "identify1", + ... "guardrailVersion": 'v1' + ... }, + ... } + >>> bedrock_model = AmazonBedrockServerModel( + ... model_id='anthropic.claude-3-haiku-20240307-v1:0', + ... **additional_api_config + ... ) + """ + + def __init__( + self, + model_id: str, + client=None, + client_kwargs: dict[str, Any] | None = None, + custom_role_conversions: dict[str, str] | None = None, **kwargs, ): - # read the api key manually, to avoid super().__init__() trying to use the wrong api_key (OPENAI_API_KEY) - if api_key is None: - api_key = os.environ.get("AZURE_OPENAI_API_KEY") + self.client_kwargs = client_kwargs or {} + + # Bedrock only supports `assistant` and `user` roles. + # Many Bedrock models do not allow conversations to start with the `assistant` role, so the default is set to `user/user`. + # This parameter is retained for future model implementations and extended support. + custom_role_conversions = custom_role_conversions or { + MessageRole.SYSTEM: MessageRole.USER, + MessageRole.ASSISTANT: MessageRole.USER, + MessageRole.TOOL_CALL: MessageRole.USER, + MessageRole.TOOL_RESPONSE: MessageRole.USER, + } + + super().__init__( + model_id=model_id, + custom_role_conversions=custom_role_conversions, + flatten_messages_as_text=False, # Bedrock API doesn't support flatten messages, must be a list of messages + client=client, + **kwargs, + ) + + def _prepare_completion_kwargs( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + custom_role_conversions: dict[str, str] | None = None, + convert_images_to_image_urls: bool = False, + **kwargs, + ) -> dict: + """ + Overrides the base method to handle Bedrock-specific configurations. + + This implementation adapts the completion keyword arguments to align with + Bedrock's requirements, ensuring compatibility with its unique setup and + constraints. + """ + completion_kwargs = super()._prepare_completion_kwargs( + messages=messages, + stop_sequences=None, # Bedrock support stop_sequence using Inference Config + grammar=None, # Bedrock doesn't support grammar + tools_to_call_from=tools_to_call_from, + custom_role_conversions=custom_role_conversions, + convert_images_to_image_urls=convert_images_to_image_urls, + **kwargs, + ) + + # Not all models in Bedrock support `toolConfig`. Also, smolagents already include the tool call in the prompt, + # so adding `toolConfig` could cause conflicts. We remove it to avoid issues. + completion_kwargs.pop("toolConfig", None) + + # The Bedrock API does not support the `type` key in requests. + # This block of code modifies the object to meet Bedrock's requirements. + for message in completion_kwargs.get("messages", []): + for content in message.get("content", []): + if "type" in content: + del content["type"] + + return { + "modelId": self.model_id, + **completion_kwargs, + } + + def create_client(self): + try: + import boto3 # type: ignore + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "Please install 'bedrock' extra to use AmazonBedrockServerModel: `pip install 'smolagents[bedrock]'`" + ) from e + + return boto3.client("bedrock-runtime", **self.client_kwargs) + + def generate( + self, + messages: list[dict[str, str | list[dict]]], + stop_sequences: list[str] | None = None, + grammar: str | None = None, + tools_to_call_from: list[Tool] | None = None, + **kwargs, + ) -> ChatMessage: + completion_kwargs: dict = self._prepare_completion_kwargs( + messages=messages, + tools_to_call_from=tools_to_call_from, + custom_role_conversions=self.custom_role_conversions, + convert_images_to_image_urls=True, + **kwargs, + ) + + # self.client is created in ApiModel class + response = self.client.converse(**completion_kwargs) - super().__init__(model_id=model_id, api_key=api_key, custom_role_conversions=custom_role_conversions, **kwargs) - # if we've reached this point, it means the openai package is available (checked in baseclass) so go ahead and import it - import openai + # Get usage + self.last_input_token_count = response["usage"]["inputTokens"] + self.last_output_token_count = response["usage"]["outputTokens"] - self.client = openai.AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=azure_endpoint) + # Get first message + response["output"]["message"]["content"] = response["output"]["message"]["content"][0]["text"] + return ChatMessage.from_dict(response["output"]["message"], raw=response) __all__ = [ @@ -1025,9 +1676,14 @@ def __init__( "Model", "MLXModel", "TransformersModel", + "ApiModel", + "InferenceClientModel", "HfApiModel", "LiteLLMModel", + "LiteLLMRouterModel", "OpenAIServerModel", + "VLLMModel", "AzureOpenAIServerModel", + "AmazonBedrockServerModel", "ChatMessage", ] diff --git a/src/smolagents/monitoring.py b/src/smolagents/monitoring.py index d7deb4403..0d827a95e 100644 --- a/src/smolagents/monitoring.py +++ b/src/smolagents/monitoring.py @@ -16,7 +16,6 @@ # limitations under the License. import json from enum import IntEnum -from typing import List, Optional from rich import box from rich.console import Console, Group @@ -27,6 +26,8 @@ from rich.text import Text from rich.tree import Tree +from smolagents.utils import escape_code_brackets + __all__ = ["AgentLogger", "LogLevel", "Monitor"] @@ -82,11 +83,14 @@ class LogLevel(IntEnum): class AgentLogger: - def __init__(self, level: LogLevel = LogLevel.INFO): + def __init__(self, level: LogLevel = LogLevel.INFO, console: Console | None = None): self.level = level - self.console = Console() + if console is None: + self.console = Console() + else: + self.console = console - def log(self, *args, level: str | LogLevel = LogLevel.INFO, **kwargs) -> None: + def log(self, *args, level: int | str | LogLevel = LogLevel.INFO, **kwargs) -> None: """Logs a message to the console. Args: @@ -97,7 +101,10 @@ def log(self, *args, level: str | LogLevel = LogLevel.INFO, **kwargs) -> None: if level <= self.level: self.console.print(*args, **kwargs) - def log_markdown(self, content: str, title: Optional[str] = None, level=LogLevel.INFO, style=YELLOW_HEX) -> None: + def log_error(self, error_message: str) -> None: + self.log(escape_code_brackets(error_message), style="bold red", level=LogLevel.ERROR) + + def log_markdown(self, content: str, title: str | None = None, level=LogLevel.INFO, style=YELLOW_HEX) -> None: markdown_content = Syntax( content, lexer="markdown", @@ -145,10 +152,10 @@ def log_rule(self, title: str, level: int = LogLevel.INFO) -> None: level=LogLevel.INFO, ) - def log_task(self, content: str, subtitle: str, title: Optional[str] = None, level: int = LogLevel.INFO) -> None: + def log_task(self, content: str, subtitle: str, title: str | None = None, level: LogLevel = LogLevel.INFO) -> None: self.log( Panel( - f"\n[bold]{content}\n", + f"\n[bold]{escape_code_brackets(content)}\n", title="[bold]New run" + (f" - {title}" if title else ""), subtitle=subtitle, border_style=YELLOW_HEX, @@ -157,7 +164,7 @@ def log_task(self, content: str, subtitle: str, title: Optional[str] = None, lev level=level, ) - def log_messages(self, messages: List) -> None: + def log_messages(self, messages: list[dict], level: LogLevel = LogLevel.DEBUG) -> None: messages_as_string = "\n".join([json.dumps(dict(message), indent=4) for message in messages]) self.log( Syntax( @@ -165,7 +172,8 @@ def log_messages(self, messages: List) -> None: lexer="markdown", theme="github-dark", word_wrap=True, - ) + ), + level=level, ) def visualize_agent_tree(self, agent): @@ -184,7 +192,7 @@ def create_tools_section(tools_dict): return Group("๐Ÿ› ๏ธ [italic #1E90FF]Tools:[/italic #1E90FF]", table) - def get_agent_headline(agent, name: Optional[str] = None): + def get_agent_headline(agent, name: str | None = None): name_headline = f"{name} | " if name else "" return f"[bold {YELLOW_HEX}]{name_headline}{agent.__class__.__name__} | {agent.model.model_id}" diff --git a/src/smolagents/prompts/code_agent.yaml b/src/smolagents/prompts/code_agent.yaml index b7388e207..29294601a 100644 --- a/src/smolagents/prompts/code_agent.yaml +++ b/src/smolagents/prompts/code_agent.yaml @@ -141,22 +141,31 @@ system_prompt: |- final_answer(pope_current_age) ``` - Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: + Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools, behaving like regular python functions: + ```python {%- for tool in tools.values() %} - - {{ tool.name }}: {{ tool.description }} - Takes inputs: {{tool.inputs}} - Returns an output of type: {{tool.output_type}} - {%- endfor %} + def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}: + """{{ tool.description }} + + Args: + {%- for arg_name, arg_info in tool.inputs.items() %} + {{ arg_name }}: {{ arg_info.description }} + {%- endfor %} + """ + {% endfor %} + ``` {%- if managed_agents and managed_agents.values() | list %} You can also give tasks to team members. - Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. - Given that this team member is a real human, you should be very verbose in your task. + Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'. + Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary. Here is a list of the team members that you can call: + ```python {%- for agent in managed_agents.values() %} - - {{ agent.name }}: {{ agent.description }} - {%- endfor %} - {%- else %} + def {{ agent.name }}("Your query goes here.") -> str: + """{{ agent.description }}""" + {% endfor %} + ``` {%- endif %} Here are the rules you should always follow to solve your task: @@ -171,140 +180,123 @@ system_prompt: |- 9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist. 10. Don't give up! You're in charge of solving the task, not providing directions to solve it. - Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. + Now Begin! planning: - initial_facts: |- - Below I will present you a task. - - You will now build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need. - To do so, you will have to read the task and identify things that must be discovered in order to successfully complete it. - Don't make any assumptions. For each item, provide a thorough reasoning. Here is how you will structure this survey: + initial_plan : |- + You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task. + Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task. - --- - ### 1. Facts given in the task + ## 1. Facts survey + You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need. + These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings: + ### 1.1. Facts given in the task List here the specific facts given in the task that could help you (there might be nothing here). - ### 2. Facts to look up + ### 1.2. Facts to look up List here any facts that we may need to look up. Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here. - ### 3. Facts to derive + ### 1.3. Facts to derive List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation. - Keep in mind that "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings: - ### 1. Facts given in the task - ### 2. Facts to look up - ### 3. Facts to derive - Do not add anything else. + Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above. - Here is the task: - ``` - {{task}} - ``` - Now begin! - initial_plan : |- - You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools. - - Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. + ## 2. Plan + Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer. Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS. After writing the final step of the plan, write the '\n' tag and stop there. - Here is your task: - - Task: - ``` - {{task}} - ``` - You can leverage these tools: + You can leverage these tools, behaving like regular python functions: + ```python {%- for tool in tools.values() %} - - {{ tool.name }}: {{ tool.description }} - Takes inputs: {{tool.inputs}} - Returns an output of type: {{tool.output_type}} - {%- endfor %} + def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}: + """{{ tool.description }} + + Args: + {%- for arg_name, arg_info in tool.inputs.items() %} + {{ arg_name }}: {{ arg_info.description }} + {%- endfor %} + """ + {% endfor %} + ``` {%- if managed_agents and managed_agents.values() | list %} You can also give tasks to team members. - Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. - Given that this team member is a real human, you should be very verbose in your task. + Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'. + Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary. Here is a list of the team members that you can call: + ```python {%- for agent in managed_agents.values() %} - - {{ agent.name }}: {{ agent.description }} - {%- endfor %} - {%- else %} + def {{ agent.name }}("Your query goes here.") -> str: + """{{ agent.description }}""" + {% endfor %} + ``` {%- endif %} - List of facts that you know: + --- + Now begin! Here is your task: ``` - {{answer_facts}} + {{task}} ``` - - Now begin! Write your plan below. - update_facts_pre_messages: |- - You are a world expert at gathering known and unknown facts based on a conversation. - Below you will find a task, and a history of attempts made to solve the task. You will have to produce a list of these: - ### 1. Facts given in the task - ### 2. Facts that we have learned - ### 3. Facts still to look up - ### 4. Facts still to derive - Find the task and history below: - update_facts_post_messages: |- - Earlier we've built a list of facts. - But since in your previous steps you may have learned useful new facts or invalidated some false ones. - Please update your list of facts based on the previous history, and provide these headings: - ### 1. Facts given in the task - ### 2. Facts that we have learned - ### 3. Facts still to look up - ### 4. Facts still to derive - - Now write your new list of facts below. + First in part 1, write the facts survey, then in part 2, write your plan. update_plan_pre_messages: |- - You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools. - - You have been given a task: + You are a world expert at analyzing a situation, and plan accordingly towards solving a task. + You have been given the following task: ``` {{task}} ``` - - Find below the record of what has been tried so far to solve it. Then you will be asked to make an updated plan to solve the task. - If the previous tries so far have met some success, you can make an updated plan based on these actions. + + Below you will find a history of attempts made to solve this task. + You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task. + If the previous tries so far have met some success, your updated plan can build on these results. If you are stalled, you can make a completely new plan starting from scratch. + + Find the task and history below: update_plan_post_messages: |- - You're still working towards solving this task: - ``` - {{task}} - ``` + Now write your updated facts below, taking into account the above history: + ## 1. Updated facts survey + ### 1.1. Facts given in the task + ### 1.2. Facts that we have learned + ### 1.3. Facts still to look up + ### 1.4. Facts still to derive + + Then write a step-by-step high-level plan to solve the task above. + ## 2. Plan + ### 2. 1. ... + Etc. + This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer. + Beware that you have {remaining_steps} steps remaining. + Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS. + After writing the final step of the plan, write the '\n' tag and stop there. - You can leverage these tools: + You can leverage these tools, behaving like regular python functions: + ```python {%- for tool in tools.values() %} - - {{ tool.name }}: {{ tool.description }} - Takes inputs: {{tool.inputs}} - Returns an output of type: {{tool.output_type}} - {%- endfor %} + def {{ tool.name }}({% for arg_name, arg_info in tool.inputs.items() %}{{ arg_name }}: {{ arg_info.type }}{% if not loop.last %}, {% endif %}{% endfor %}) -> {{tool.output_type}}: + """{{ tool.description }} + + Args: + {%- for arg_name, arg_info in tool.inputs.items() %} + {{ arg_name }}: {{ arg_info.description }} + {%- endfor %}""" + {% endfor %} + ``` {%- if managed_agents and managed_agents.values() | list %} You can also give tasks to team members. Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task'. Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary. Here is a list of the team members that you can call: + ```python {%- for agent in managed_agents.values() %} - - {{ agent.name }}: {{ agent.description }} - {%- endfor %} - {%- else %} - {%- endif %} - - Here is the up to date list of facts that you know: + def {{ agent.name }}("Your query goes here.") -> str: + """{{ agent.description }}""" + {% endfor %} ``` - {{facts_update}} - ``` - - Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. - This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer. - Beware that you have {remaining_steps} steps remaining. - Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS. - After writing the final step of the plan, write the '\n' tag and stop there. + {%- endif %} - Now write your new plan below. + Now write your updated facts survey below, then your new plan. managed_agent: task: |- You're a helpful agent named '{{name}}'. diff --git a/src/smolagents/prompts/toolcalling_agent.yaml b/src/smolagents/prompts/toolcalling_agent.yaml index 744bd7451..3e99fb423 100644 --- a/src/smolagents/prompts/toolcalling_agent.yaml +++ b/src/smolagents/prompts/toolcalling_agent.yaml @@ -1,5 +1,5 @@ system_prompt: |- - You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. + You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. To do so, you have been given access to some tools. The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation". @@ -104,7 +104,6 @@ system_prompt: |- {%- for agent in managed_agents.values() %} - {{ agent.name }}: {{ agent.description }} {%- endfor %} - {%- else %} {%- endif %} Here are the rules you should always follow to solve your task: @@ -114,51 +113,33 @@ system_prompt: |- If no tool call is needed, use final_answer tool to return your answer. 4. Never re-do a tool call that you previously did with the exact same parameters. - Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. + Now Begin! planning: - initial_facts: |- - Below I will present you a task. - - You will now build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need. - To do so, you will have to read the task and identify things that must be discovered in order to successfully complete it. - Don't make any assumptions. For each item, provide a thorough reasoning. Here is how you will structure this survey: + initial_plan : |- + You are a world expert at analyzing a situation to derive facts, and plan accordingly towards solving a task. + Below I will present you a task. You will need to 1. build a survey of facts known or needed to solve the task, then 2. make a plan of action to solve the task. - --- - ### 1. Facts given in the task + ## 1. Facts survey + You will build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need. + These "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings: + ### 1.1. Facts given in the task List here the specific facts given in the task that could help you (there might be nothing here). - ### 2. Facts to look up + ### 1.2. Facts to look up List here any facts that we may need to look up. Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here. - ### 3. Facts to derive + ### 1.3. Facts to derive List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation. - Keep in mind that "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings: - ### 1. Facts given in the task - ### 2. Facts to look up - ### 3. Facts to derive - Do not add anything else. + Don't make any assumptions. For each item, provide a thorough reasoning. Do not add anything else on top of three headings above. - Here is the task: - ``` - {{task}} - ``` - Now begin! - initial_plan : |- - You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools. - - Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. + ## 2. Plan + Then for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer. Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS. After writing the final step of the plan, write the '\n' tag and stop there. - Here is your task: - - Task: - ``` - {{task}} - ``` You can leverage these tools: {%- for tool in tools.values() %} - {{ tool.name }}: {{ tool.description }} @@ -174,49 +155,43 @@ planning: {%- for agent in managed_agents.values() %} - {{ agent.name }}: {{ agent.description }} {%- endfor %} - {%- else %} {%- endif %} - List of facts that you know: + --- + Now begin! Here is your task: ``` - {{answer_facts}} + {{task}} ``` - - Now begin! Write your plan below. - update_facts_pre_messages: |- - You are a world expert at gathering known and unknown facts based on a conversation. - Below you will find a task, and a history of attempts made to solve the task. You will have to produce a list of these: - ### 1. Facts given in the task - ### 2. Facts that we have learned - ### 3. Facts still to look up - ### 4. Facts still to derive - Find the task and history below: - update_facts_post_messages: |- - Earlier we've built a list of facts. - But since in your previous steps you may have learned useful new facts or invalidated some false ones. - Please update your list of facts based on the previous history, and provide these headings: - ### 1. Facts given in the task - ### 2. Facts that we have learned - ### 3. Facts still to look up - ### 4. Facts still to derive - - Now write your new list of facts below. + First in part 1, write the facts survey, then in part 2, write your plan. update_plan_pre_messages: |- - You are a world expert at making efficient plans to solve any task using a set of carefully crafted tools. - - You have been given a task: + You are a world expert at analyzing a situation, and plan accordingly towards solving a task. + You have been given the following task: ``` {{task}} ``` - - Find below the record of what has been tried so far to solve it. Then you will be asked to make an updated plan to solve the task. - If the previous tries so far have met some success, you can make an updated plan based on these actions. + + Below you will find a history of attempts made to solve this task. + You will first have to produce a survey of known and unknown facts, then propose a step-by-step high-level plan to solve the task. + If the previous tries so far have met some success, your updated plan can build on these results. If you are stalled, you can make a completely new plan starting from scratch. + + Find the task and history below: update_plan_post_messages: |- - You're still working towards solving this task: - ``` - {{task}} - ``` + Now write your updated facts below, taking into account the above history: + ## 1. Updated facts survey + ### 1.1. Facts given in the task + ### 1.2. Facts that we have learned + ### 1.3. Facts still to look up + ### 1.4. Facts still to derive + + Then write a step-by-step high-level plan to solve the task above. + ## 2. Plan + ### 2. 1. ... + Etc. + This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer. + Beware that you have {remaining_steps} steps remaining. + Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS. + After writing the final step of the plan, write the '\n' tag and stop there. You can leverage these tools: {%- for tool in tools.values() %} @@ -233,20 +208,8 @@ planning: {%- for agent in managed_agents.values() %} - {{ agent.name }}: {{ agent.description }} {%- endfor %} - {%- else %} {%- endif %} - Here is the up to date list of facts that you know: - ``` - {{facts_update}} - ``` - - Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. - This plan should involve individual tasks based on the available tools, that if executed correctly will yield the correct answer. - Beware that you have {remaining_steps} steps remaining. - Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS. - After writing the final step of the plan, write the '\n' tag and stop there. - Now write your new plan below. managed_agent: task: |- diff --git a/src/smolagents/remote_executors.py b/src/smolagents/remote_executors.py new file mode 100644 index 000000000..acfe70020 --- /dev/null +++ b/src/smolagents/remote_executors.py @@ -0,0 +1,387 @@ +#!/usr/bin/env python +# coding=utf-8 + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 +import json +import pickle +import re +import time +from io import BytesIO +from pathlib import Path +from textwrap import dedent +from typing import Any + +import PIL.Image +import requests + +from .local_python_executor import PythonExecutor +from .monitoring import LogLevel +from .tools import Tool, get_tools_definition_code +from .utils import AgentError + + +try: + from dotenv import load_dotenv + + load_dotenv() +except ModuleNotFoundError: + pass + + +class RemotePythonExecutor(PythonExecutor): + def __init__(self, additional_imports: list[str], logger): + self.additional_imports = additional_imports + self.logger = logger + self.logger.log("Initializing executor, hold on...") + self.final_answer_pattern = re.compile(r"^final_answer\((.*)\)$", re.M) + self.installed_packages = [] + + def run_code_raise_errors(self, code: str, return_final_answer: bool = False) -> tuple[Any, str]: + raise NotImplementedError + + def send_tools(self, tools: dict[str, Tool]): + tool_definition_code = get_tools_definition_code(tools) + + packages_to_install = set() + for tool in tools.values(): + for package in tool.to_dict()["requirements"]: + if package not in self.installed_packages: + packages_to_install.add(package) + self.installed_packages.append(package) + + execution = self.run_code_raise_errors( + f"!pip install {' '.join(packages_to_install)}\n" + tool_definition_code + ) + self.logger.log(execution[1]) + + def send_variables(self, variables: dict): + """ + Send variables to the kernel namespace using pickle. + """ + pickled_vars = base64.b64encode(pickle.dumps(variables)).decode() + code = f""" +import pickle, base64 +vars_dict = pickle.loads(base64.b64decode('{pickled_vars}')) +locals().update(vars_dict) +""" + self.run_code_raise_errors(code) + + def __call__(self, code_action: str) -> tuple[Any, str, bool]: + """Check if code is a final answer and run it accordingly""" + is_final_answer = bool(self.final_answer_pattern.search(code_action)) + output = self.run_code_raise_errors(code_action, return_final_answer=is_final_answer) + return output[0], output[1], is_final_answer + + def install_packages(self, additional_imports: list[str]): + additional_imports = additional_imports + ["smolagents"] + _, execution_logs = self.run_code_raise_errors(f"!pip install {' '.join(additional_imports)}") + self.logger.log(execution_logs) + return additional_imports + + +class E2BExecutor(RemotePythonExecutor): + """ + Executes Python code using E2B. + + Args: + additional_imports (`list[str]`): Additional imports to install. + logger (`Logger`): Logger to use. + **kwargs: Additional arguments to pass to the E2B Sandbox. + """ + + def __init__(self, additional_imports: list[str], logger, **kwargs): + super().__init__(additional_imports, logger) + try: + from e2b_code_interpreter import Sandbox + except ModuleNotFoundError: + raise ModuleNotFoundError( + """Please install 'e2b' extra to use E2BExecutor: `pip install 'smolagents[e2b]'`""" + ) + self.sandbox = Sandbox(**kwargs) + self.installed_packages = self.install_packages(additional_imports) + self.logger.log("E2B is running", level=LogLevel.INFO) + + def run_code_raise_errors(self, code: str, return_final_answer: bool = False) -> tuple[Any, str]: + execution = self.sandbox.run_code( + code, + ) + if execution.error: + execution_logs = "\n".join([str(log) for log in execution.logs.stdout]) + logs = execution_logs + logs += "Executing code yielded an error:" + logs += execution.error.name + "\n" + logs += execution.error.value + logs += execution.error.traceback + raise AgentError(logs, self.logger) + execution_logs = "\n".join([str(log) for log in execution.logs.stdout]) + if not execution.results: + return None, execution_logs + else: + for result in execution.results: + if result.is_main_result: + for attribute_name in ["jpeg", "png"]: + if getattr(result, attribute_name) is not None: + image_output = getattr(result, attribute_name) + decoded_bytes = base64.b64decode(image_output.encode("utf-8")) + return PIL.Image.open(BytesIO(decoded_bytes)), execution_logs + for attribute_name in [ + "chart", + "data", + "html", + "javascript", + "json", + "latex", + "markdown", + "pdf", + "svg", + "text", + ]: + if getattr(result, attribute_name) is not None: + return getattr(result, attribute_name), execution_logs + if return_final_answer: + raise AgentError("No main result returned by executor!", self.logger) + return None, execution_logs + + +class DockerExecutor(RemotePythonExecutor): + """ + Executes Python code using Jupyter Kernel Gateway in a Docker container. + """ + + def __init__( + self, + additional_imports: list[str], + logger, + host: str = "127.0.0.1", + port: int = 8888, + image_name: str = "jupyter-kernel", + build_new_image: bool = True, + container_run_kwargs: dict[str, Any] | None = None, + ): + """ + Initialize the Docker-based Jupyter Kernel Gateway executor. + + Args: + additional_imports: Additional imports to install. + logger: Logger to use. + host: Host to bind to. + port: Port to bind to. + image_name: Name of the Docker image to use. If the image doesn't exist, it will be built. + build_new_image: If True, the image will be rebuilt even if it already exists. + container_run_kwargs: Additional keyword arguments to pass to the Docker container run command. + """ + super().__init__(additional_imports, logger) + try: + import docker + from websocket import create_connection + except ModuleNotFoundError: + raise ModuleNotFoundError( + "Please install 'docker' extra to use DockerExecutor: `pip install 'smolagents[docker]'`" + ) + self.host = host + self.port = port + self.image_name = image_name + + # Initialize Docker + try: + self.client = docker.from_env() + except docker.errors.DockerException as e: + raise RuntimeError("Could not connect to Docker daemon: make sure Docker is running.") from e + + # Build and start container + try: + # Check if image exists, unless forced to rebuild + if not build_new_image: + try: + self.client.images.get(self.image_name) + self.logger.log(f"Using existing Docker image: {self.image_name}", level=LogLevel.INFO) + except docker.errors.ImageNotFound: + self.logger.log(f"Image {self.image_name} not found, building...", level=LogLevel.INFO) + build_new_image = True + + if build_new_image: + self.logger.log(f"Building Docker image {self.image_name}...", level=LogLevel.INFO) + dockerfile_path = Path(__file__).parent / "Dockerfile" + if not dockerfile_path.exists(): + with open(dockerfile_path, "w") as f: + f.write("""FROM python:3.12-slim + +RUN pip install jupyter_kernel_gateway requests numpy pandas +RUN pip install jupyter_client notebook + +EXPOSE 8888 +CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip='0.0.0.0'", "--KernelGatewayApp.port=8888", "--KernelGatewayApp.allow_origin='*'"] +""") + _, build_logs = self.client.images.build( + path=str(dockerfile_path.parent), dockerfile=str(dockerfile_path), tag=self.image_name + ) + self.logger.log(build_logs, level=LogLevel.DEBUG) + + self.logger.log(f"Starting container on {host}:{port}...", level=LogLevel.INFO) + # Create base container parameters + container_kwargs = {} + if container_run_kwargs: + container_kwargs.update(container_run_kwargs) + + # Ensure required port mapping and background running + if not isinstance(container_kwargs.get("ports"), dict): + container_kwargs["ports"] = {} + container_kwargs["ports"]["8888/tcp"] = (host, port) + container_kwargs["detach"] = True + + self.container = self.client.containers.run(self.image_name, **container_kwargs) + + retries = 0 + while self.container.status != "running" and retries < 5: + self.logger.log(f"Container status: {self.container.status}, waiting...", level=LogLevel.INFO) + time.sleep(1) + self.container.reload() + retries += 1 + + self.base_url = f"http://{host}:{port}" + + # Create new kernel via HTTP + r = requests.post(f"{self.base_url}/api/kernels") + if r.status_code != 201: + error_details = { + "status_code": r.status_code, + "headers": dict(r.headers), + "url": r.url, + "body": r.text, + "request_method": r.request.method, + "request_headers": dict(r.request.headers), + "request_body": r.request.body, + } + self.logger.log_error(f"Failed to create kernel. Details: {json.dumps(error_details, indent=2)}") + raise RuntimeError(f"Failed to create kernel: Status {r.status_code}\nResponse: {r.text}") from None + + self.kernel_id = r.json()["id"] + + ws_url = f"ws://{host}:{port}/api/kernels/{self.kernel_id}/channels" + self.ws = create_connection(ws_url) + + self.installed_packages = self.install_packages(additional_imports) + self.logger.log( + f"Container {self.container.short_id} is running with kernel {self.kernel_id}", level=LogLevel.INFO + ) + + except Exception as e: + self.cleanup() + raise RuntimeError(f"Failed to initialize Jupyter kernel: {e}") from e + + def run_code_raise_errors(self, code_action: str, return_final_answer: bool = False) -> tuple[Any, str]: + """ + Execute code and return result based on whether it's a final answer. + """ + try: + if return_final_answer: + match = self.final_answer_pattern.search(code_action) + if match: + pre_final_answer_code = self.final_answer_pattern.sub("", code_action) + result_expr = match.group(1) + wrapped_code = pre_final_answer_code + dedent(f""" + import pickle, base64 + _result = {result_expr} + print("RESULT_PICKLE:" + base64.b64encode(pickle.dumps(_result)).decode()) + """) + else: + wrapped_code = code_action + + # Send execute request + msg_id = self._send_execute_request(wrapped_code) + + # Collect output and results + outputs = [] + result = None + waiting_for_idle = False + + while True: + msg = json.loads(self.ws.recv()) + msg_type = msg.get("msg_type", "") + parent_msg_id = msg.get("parent_header", {}).get("msg_id") + + # Only process messages related to our execute request + if parent_msg_id != msg_id: + continue + + if msg_type == "stream": + text = msg["content"]["text"] + if return_final_answer and text.startswith("RESULT_PICKLE:"): + pickle_data = text[len("RESULT_PICKLE:") :].strip() + result = pickle.loads(base64.b64decode(pickle_data)) + waiting_for_idle = True + else: + outputs.append(text) + elif msg_type == "error": + traceback = msg["content"].get("traceback", []) + raise AgentError("\n".join(traceback), self.logger) + elif msg_type == "status" and msg["content"]["execution_state"] == "idle": + if not return_final_answer or waiting_for_idle: + break + + return result, "".join(outputs) + + except Exception as e: + self.logger.log_error(f"Code execution failed: {e}") + raise + + def _send_execute_request(self, code: str) -> str: + """Send code execution request to kernel.""" + import uuid + + # Generate a unique message ID + msg_id = str(uuid.uuid4()) + + # Create execute request + execute_request = { + "header": { + "msg_id": msg_id, + "username": "anonymous", + "session": str(uuid.uuid4()), + "msg_type": "execute_request", + "version": "5.0", + }, + "parent_header": {}, + "metadata": {}, + "content": { + "code": code, + "silent": False, + "store_history": True, + "user_expressions": {}, + "allow_stdin": False, + }, + } + + self.ws.send(json.dumps(execute_request)) + return msg_id + + def cleanup(self): + """Clean up resources.""" + try: + if hasattr(self, "container"): + self.logger.log(f"Stopping and removing container {self.container.short_id}...", level=LogLevel.INFO) + self.container.stop() + self.container.remove() + self.logger.log("Container cleanup completed", level=LogLevel.INFO) + except Exception as e: + self.logger.log_error(f"Error during cleanup: {e}") + + def delete(self): + """Ensure cleanup on deletion.""" + self.cleanup() + + +__all__ = ["E2BExecutor", "DockerExecutor"] diff --git a/src/smolagents/tool_validation.py b/src/smolagents/tool_validation.py index 125e68993..3b8a3fdca 100644 --- a/src/smolagents/tool_validation.py +++ b/src/smolagents/tool_validation.py @@ -1,9 +1,8 @@ import ast import builtins from itertools import zip_longest -from typing import Set -from .utils import BASE_BUILTIN_MODULES, get_source +from .utils import BASE_BUILTIN_MODULES, get_source, is_valid_name _BUILTIN_NAMES = set(vars(builtins)) @@ -16,7 +15,7 @@ class MethodChecker(ast.NodeVisitor): - contains no local imports (e.g. numpy is ok but local_script is not) """ - def __init__(self, class_attributes: Set[str], check_imports: bool = True): + def __init__(self, class_attributes: set[str], check_imports: bool = True): self.undefined_names = set() self.imports = {} self.from_imports = {} @@ -50,6 +49,10 @@ def visit_Assign(self, node): for target in node.targets: if isinstance(target, ast.Name): self.assigned_names.add(target.id) + elif isinstance(target, (ast.Tuple, ast.List)): + for elt in target.elts: + if isinstance(elt, ast.Name): + self.assigned_names.add(elt.id) self.visit(node.value) def visit_With(self, node): @@ -166,6 +169,7 @@ def __init__(self): self.non_defaults = set() self.non_literal_defaults = set() self.in_method = False + self.invalid_attributes = [] def visit_FunctionDef(self, node): if node.name == "__init__": @@ -192,6 +196,19 @@ def visit_Assign(self, node): if isinstance(target, ast.Name): self.complex_attributes.add(target.id) + # Check specific class attributes + if getattr(node.targets[0], "id", "") == "name": + if not isinstance(node.value, ast.Constant): + self.invalid_attributes.append(f"Class attribute 'name' must be a constant, found '{node.value}'") + elif not isinstance(node.value.value, str): + self.invalid_attributes.append( + f"Class attribute 'name' must be a string, found '{node.value.value}'" + ) + elif not is_valid_name(node.value.value): + self.invalid_attributes.append( + f"Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found '{node.value.value}'" + ) + def _check_init_function_parameters(self, node): # Check defaults in parameters for arg, default in reversed(list(zip_longest(reversed(node.args.args), reversed(node.args.defaults)))): @@ -210,6 +227,9 @@ def _check_init_function_parameters(self, node): class_level_checker.visit(class_node) errors = [] + # Check invalid class attributes + if class_level_checker.invalid_attributes: + errors += class_level_checker.invalid_attributes if class_level_checker.complex_attributes: errors.append( f"Complex attributes should be defined in __init__, not as class attributes: " diff --git a/src/smolagents/tools.py b/src/smolagents/tools.py index 3f8b25a26..35622c090 100644 --- a/src/smolagents/tools.py +++ b/src/smolagents/tools.py @@ -14,6 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import ast import inspect import json @@ -23,19 +25,20 @@ import tempfile import textwrap import types +from collections.abc import Callable from contextlib import contextmanager from functools import wraps from pathlib import Path -from typing import Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any from huggingface_hub import ( + CommitOperationAdd, + create_commit, create_repo, get_collection, hf_hub_download, metadata_update, - upload_folder, ) -from huggingface_hub.utils import is_torch_available from ._function_type_hints_utils import ( TypeHintParsingException, @@ -45,7 +48,11 @@ ) from .agent_types import handle_agent_input_types, handle_agent_output_types from .tool_validation import MethodChecker, validate_tool_attributes -from .utils import _is_package_available, _is_pillow_available, get_source, instance_to_source +from .utils import BASE_BUILTIN_MODULES, _is_package_available, get_source, instance_to_source, is_valid_name + + +if TYPE_CHECKING: + import mcp logger = logging.getLogger(__name__) @@ -89,7 +96,7 @@ class Tool: returns the text contained in the file'. - **name** (`str`) -- A performative name that will be used for your tool in the prompt to the agent. For instance `"text-classifier"` or `"image_generator"`. - - **inputs** (`Dict[str, Dict[str, Union[str, type]]]`) -- The dict of modalities expected for the inputs. + - **inputs** (`Dict[str, Dict[str, Union[str, type, bool]]]`) -- The dict of modalities expected for the inputs. It has one `type`key and a `description`key. This is used by `launch_gradio_demo` or to make a nice space from your tool, and also can be used in the generated description for your tool. @@ -103,7 +110,7 @@ class Tool: name: str description: str - inputs: Dict[str, Dict[str, Union[str, type, bool]]] + inputs: dict[str, dict[str, str | type | bool]] output_type: str def __init__(self, *args, **kwargs): @@ -120,7 +127,7 @@ def validate_arguments(self): "inputs": dict, "output_type": str, } - + # Validate class attributes for attr, expected_type in required_attributes.items(): attr_value = getattr(self, attr, None) if attr_value is None: @@ -129,6 +136,12 @@ def validate_arguments(self): raise TypeError( f"Attribute {attr} should have type {expected_type.__name__}, got {type(attr_value)} instead." ) + # - Validate name + if not is_valid_name(self.name): + raise Exception( + f"Invalid Tool name '{self.name}': must be a valid Python identifier and not a reserved keyword" + ) + # Validate inputs for input_name, input_content in self.inputs.items(): assert isinstance(input_content, dict), f"Input '{input_name}' should be a dictionary." assert "type" in input_content and "description" in input_content, ( @@ -138,7 +151,7 @@ def validate_arguments(self): raise Exception( f"Input '{input_name}': type '{input_content['type']}' is not an authorized value, should be one of {AUTHORIZED_TYPES}." ) - + # Validate output type assert getattr(self, "output_type", None) in AUTHORIZED_TYPES # Validate forward function signature, except for Tools that use a "generic" signature (PipelineTool, SpaceToolWrapper, LangChainToolWrapper) @@ -147,10 +160,12 @@ def validate_arguments(self): and getattr(self, "skip_forward_signature_validation") is True ): signature = inspect.signature(self.forward) - - if not set(signature.parameters.keys()) == set(self.inputs.keys()): + actual_keys = set(key for key in signature.parameters.keys() if key != "self") + expected_keys = set(self.inputs.keys()) + if actual_keys != expected_keys: raise Exception( - "Tool's 'forward' method should take 'self' as its first argument, then its next arguments should match the keys of tool attribute 'inputs'." + f"In tool '{self.name}', 'forward' method parameters were {actual_keys}, but expected {expected_keys}. " + f"It should take 'self' as its first argument, then its next arguments should match the keys of tool attribute 'inputs'." ) json_schema = _convert_type_hints_to_json_schema(self.forward, error_on_missing_type_hints=False)[ @@ -211,7 +226,8 @@ def to_dict(self) -> dict: method_checker.visit(forward_node) if len(method_checker.errors) > 0: - raise (ValueError("\n".join(method_checker.errors))) + errors = [f"- {error}" for error in method_checker.errors] + raise (ValueError(f"SimpleTool validation failed for {self.name}:\n" + "\n".join(errors))) forward_source_code = get_source(self.forward) tool_code = textwrap.dedent( @@ -222,7 +238,7 @@ def to_dict(self) -> dict: class {class_name}(Tool): name = "{self.name}" description = {json.dumps(textwrap.dedent(self.description).strip())} - inputs = {json.dumps(self.inputs, separators=(",", ":"))} + inputs = {repr(self.inputs)} output_type = "{self.output_type}" """ ).strip() @@ -261,9 +277,25 @@ def replacement(match): requirements = {el for el in get_imports(tool_code) if el not in sys.stdlib_module_names} | {"smolagents"} - return {"name": self.name, "code": tool_code, "requirements": requirements} + return {"name": self.name, "code": tool_code, "requirements": sorted(requirements)} + + @classmethod + def from_dict(cls, tool_dict: dict[str, Any], **kwargs) -> "Tool": + """ + Create tool from a dictionary representation. + + Args: + tool_dict (`dict[str, Any]`): Dictionary representation of the tool. + **kwargs: Additional keyword arguments to pass to the tool's constructor. - def save(self, output_dir: str, tool_file_name: str = "tool", make_gradio_app: bool = True): + Returns: + `Tool`: Tool object. + """ + if "code" not in tool_dict: + raise ValueError("Tool dictionary must contain 'code' key with the tool source code") + return cls.from_code(tool_dict["code"], **kwargs) + + def save(self, output_dir: str | Path, tool_file_name: str = "tool", make_gradio_app: bool = True): """ Saves the relevant code files for your tool so it can be pushed to the Hub. This will copy the code of your tool in `output_dir` as well as autogenerate: @@ -275,48 +307,31 @@ def save(self, output_dir: str, tool_file_name: str = "tool", make_gradio_app: b code) Args: - output_dir (`str`): The folder in which you want to save your tool. + output_dir (`str` or `Path`): The folder in which you want to save your tool. tool_file_name (`str`, *optional*): The file name in which you want to save your tool. make_gradio_app (`bool`, *optional*, defaults to True): Whether to also export a `requirements.txt` file and Gradio UI. """ - os.makedirs(output_dir, exist_ok=True) - class_name = self.__class__.__name__ - tool_file = os.path.join(output_dir, f"{tool_file_name}.py") - - tool_dict = self.to_dict() - tool_code = tool_dict["code"] - - with open(tool_file, "w", encoding="utf-8") as f: - f.write(tool_code.replace(":true,", ":True,").replace(":true}", ":True}")) - + # Ensure output directory exists + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + # Save tool file + self._write_file(output_path / f"{tool_file_name}.py", self._get_tool_code()) if make_gradio_app: - # Save app file - app_file = os.path.join(output_dir, "app.py") - with open(app_file, "w", encoding="utf-8") as f: - f.write( - textwrap.dedent( - f""" - from smolagents import launch_gradio_demo - from {tool_file_name} import {class_name} - - tool = {class_name}() - - launch_gradio_demo(tool) - """ - ).lstrip() - ) - + # Save app file + self._write_file(output_path / "app.py", self._get_gradio_app_code(tool_module_name=tool_file_name)) # Save requirements file - requirements_file = os.path.join(output_dir, "requirements.txt") - with open(requirements_file, "w", encoding="utf-8") as f: - f.write("\n".join(tool_dict["requirements"]) + "\n") + self._write_file(output_path / "requirements.txt", self._get_requirements()) + + def _write_file(self, file_path: Path, content: str) -> None: + """Writes content to a file with UTF-8 encoding.""" + file_path.write_text(content, encoding="utf-8") def push_to_hub( self, repo_id: str, commit_message: str = "Upload tool", - private: Optional[bool] = None, - token: Optional[Union[bool, str]] = None, + private: bool | None = None, + token: bool | str | None = None, create_pr: bool = False, ) -> str: """ @@ -334,8 +349,25 @@ def push_to_hub( The token to use as HTTP bearer authorization for remote files. If unset, will use the token generated when running `huggingface-cli login` (stored in `~/.huggingface`). create_pr (`bool`, *optional*, defaults to `False`): - Whether or not to create a PR with the uploaded files or directly commit. + Whether to create a PR with the uploaded files or directly commit. """ + # Initialize repository + repo_id = self._initialize_hub_repo(repo_id, token, private) + # Prepare files for commit + additions = self._prepare_hub_files() + # Create commit + return create_commit( + repo_id=repo_id, + operations=additions, + commit_message=commit_message, + token=token, + create_pr=create_pr, + repo_type="space", + ) + + @staticmethod + def _initialize_hub_repo(repo_id: str, token: bool | str | None, private: bool | None) -> str: + """Initialize repository on Hugging Face Hub.""" repo_url = create_repo( repo_id=repo_id, token=token, @@ -344,27 +376,56 @@ def push_to_hub( repo_type="space", space_sdk="gradio", ) - repo_id = repo_url.repo_id - metadata_update(repo_id, {"tags": ["smolagents", "tool"]}, repo_type="space", token=token) - - with tempfile.TemporaryDirectory() as work_dir: - # Save all files. - self.save(work_dir) - logger.info(f"Uploading the following files to {repo_id}: {','.join(os.listdir(work_dir))}") - return upload_folder( - repo_id=repo_id, - commit_message=commit_message, - folder_path=work_dir, - token=token, - create_pr=create_pr, - repo_type="space", - ) + metadata_update(repo_url.repo_id, {"tags": ["smolagents", "tool"]}, repo_type="space", token=token) + return repo_url.repo_id + + def _prepare_hub_files(self) -> list: + """Prepare files for Hub commit.""" + additions = [ + # Add tool code + CommitOperationAdd( + path_in_repo="tool.py", + path_or_fileobj=self._get_tool_code().encode(), + ), + # Add Gradio app + CommitOperationAdd( + path_in_repo="app.py", + path_or_fileobj=self._get_gradio_app_code().encode(), + ), + # Add requirements + CommitOperationAdd( + path_in_repo="requirements.txt", + path_or_fileobj=self._get_requirements().encode(), + ), + ] + return additions + + def _get_tool_code(self) -> str: + """Get the tool's code.""" + return self.to_dict()["code"] + + def _get_gradio_app_code(self, tool_module_name: str = "tool") -> str: + """Get the Gradio app code.""" + class_name = self.__class__.__name__ + return textwrap.dedent( + f"""\ + from smolagents import launch_gradio_demo + from {tool_module_name} import {class_name} + + tool = {class_name}() + launch_gradio_demo(tool) + """ + ) + + def _get_requirements(self) -> str: + """Get the requirements.""" + return "\n".join(self.to_dict()["requirements"]) @classmethod def from_hub( cls, repo_id: str, - token: Optional[str] = None, + token: str | None = None, trust_remote_code: bool = False, **kwargs, ): @@ -381,7 +442,7 @@ def from_hub( Args: repo_id (`str`): - The name of the repo on the Hub where your tool is defined. + The name of the Space repo on the Hub where your tool is defined. token (`str`, *optional*): The token to identify you on hf.co. If unset, will use the token generated when running `huggingface-cli login` (stored in `~/.huggingface`). @@ -444,8 +505,8 @@ def from_space( space_id: str, name: str, description: str, - api_name: Optional[str] = None, - token: Optional[str] = None, + api_name: str | None = None, + token: str | None = None, ): """ Creates a [`Tool`] from a Space given its id on the Hub. @@ -493,8 +554,8 @@ def __init__( space_id: str, name: str, description: str, - api_name: Optional[str] = None, - token: Optional[str] = None, + api_name: str | None = None, + token: str | None = None, ): self.name = name self.description = description @@ -535,11 +596,9 @@ def __init__( def sanitize_argument_for_prediction(self, arg): from gradio_client.utils import is_http_url_like + from PIL.Image import Image - if _is_pillow_available(): - from PIL.Image import Image - - if _is_pillow_available() and isinstance(arg, Image): + if isinstance(arg, Image): temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False) arg.save(temp_file.name) arg = temp_file.name @@ -641,6 +700,7 @@ def launch_gradio_demo(tool: Tool): raise ImportError("Gradio should be installed in order to launch a gradio demo.") TYPE_TO_COMPONENT_CLASS_MAPPING = { + "boolean": gr.Checkbox, "image": gr.Image, "audio": gr.Audio, "string": gr.Textbox, @@ -659,8 +719,8 @@ def tool_forward(*args, **kwargs): new_component = input_gradio_component_class(label=input_name) gradio_inputs.append(new_component) - output_gradio_componentclass = TYPE_TO_COMPONENT_CLASS_MAPPING[tool.output_type] - gradio_output = output_gradio_componentclass(label="Output") + output_gradio_component_class = TYPE_TO_COMPONENT_CLASS_MAPPING[tool.output_type] + gradio_output = output_gradio_component_class(label="Output") gr.Interface( fn=tool_forward, @@ -674,8 +734,8 @@ def tool_forward(*args, **kwargs): def load_tool( repo_id, - model_repo_id: Optional[str] = None, - token: Optional[str] = None, + model_repo_id: str | None = None, + token: str | None = None, trust_remote_code: bool = False, **kwargs, ): @@ -692,7 +752,7 @@ def load_tool( Args: repo_id (`str`): - Repo ID of a tool on the Hub. + Space repo ID of a tool on the Hub. model_repo_id (`str`, *optional*): Use this argument to use a different model than the default one for the tool you selected. token (`str`, *optional*): @@ -738,14 +798,14 @@ class ToolCollection: For example and usage, see: [`ToolCollection.from_hub`] and [`ToolCollection.from_mcp`] """ - def __init__(self, tools: List[Tool]): + def __init__(self, tools: list[Tool]): self.tools = tools @classmethod def from_hub( cls, collection_slug: str, - token: Optional[str] = None, + token: str | None = None, trust_remote_code: bool = False, ) -> "ToolCollection": """Loads a tool collection from the Hub. @@ -783,20 +843,32 @@ def from_hub( @classmethod @contextmanager - def from_mcp(cls, server_parameters) -> "ToolCollection": + def from_mcp( + cls, server_parameters: "mcp.StdioServerParameters" | dict, trust_remote_code: bool = False + ) -> "ToolCollection": """Automatically load a tool collection from an MCP server. + This method supports both SSE and Stdio MCP servers. Look at the `server_parameters` + argument for more details on how to connect to an SSE or Stdio MCP server. + Note: a separate thread will be spawned to run an asyncio event loop handling the MCP server. Args: - server_parameters (mcp.StdioServerParameters): The server parameters to use to - connect to the MCP server. + server_parameters (`mcp.StdioServerParameters` or `dict`): + The server parameters to use to connect to the MCP server. If a dict is + provided, it is assumed to be the parameters of `mcp.client.sse.sse_client`. + trust_remote_code (`bool`, *optional*, defaults to `False`): + Whether to trust the execution of code from tools defined on the MCP server. + This option should only be set to `True` if you trust the MCP server, + and undertand the risks associated with running remote code on your local machine. + If set to `False`, loading tools from MCP will fail. + Returns: ToolCollection: A tool collection instance. - Example: + Example with a Stdio MCP server: ```py >>> from smolagents import ToolCollection, CodeAgent >>> from mcp import StdioServerParameters @@ -807,11 +879,23 @@ def from_mcp(cls, server_parameters) -> "ToolCollection": >>> env={"UV_PYTHON": "3.12", **os.environ}, >>> ) - >>> with ToolCollection.from_mcp(server_parameters) as tool_collection: + >>> with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection: + >>> agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True) + >>> agent.run("Please find a remedy for hangover.") + ``` + + Example with an SSE MCP server: + ```py + >>> with ToolCollection.from_mcp({"url": "http://127.0.0.1:8000/sse"}, trust_remote_code=True) as tool_collection: >>> agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True) >>> agent.run("Please find a remedy for hangover.") ``` """ + if not trust_remote_code: + raise ValueError( + "Loading tools from MCP requires you to acknowledge you trust the MCP server, " + "as it will execute code on your local machine: pass `trust_remote_code=True`." + ) try: from mcpadapt.core import MCPAdapt from mcpadapt.smolagents_adapter import SmolAgentsAdapter @@ -826,45 +910,73 @@ def from_mcp(cls, server_parameters) -> "ToolCollection": def tool(tool_function: Callable) -> Tool: """ - Converts a function into an instance of a Tool subclass. + Convert a function into an instance of a dynamically created Tool subclass. Args: - tool_function: Your function. Should have type hints for each input and a type hint for the output. - Should also have a docstring description including an 'Args:' part where each argument is described. + tool_function (`Callable`): Function to convert into a Tool subclass. + Should have type hints for each input and a type hint for the output. + Should also have a docstring including the description of the function + and an 'Args:' part where each argument is described. """ tool_json_schema = get_json_schema(tool_function)["function"] if "return" not in tool_json_schema: raise TypeHintParsingException("Tool return type not found: make sure your function has a return type hint!") class SimpleTool(Tool): - def __init__( - self, - name: str, - description: str, - inputs: Dict[str, Dict[str, str]], - output_type: str, - function: Callable, - ): - self.name = name - self.description = description - self.inputs = inputs - self.output_type = output_type - self.forward = function + def __init__(self): self.is_initialized = True - simple_tool = SimpleTool( - name=tool_json_schema["name"], - description=tool_json_schema["description"], - inputs=tool_json_schema["parameters"]["properties"], - output_type=tool_json_schema["return"]["type"], - function=tool_function, + # Set the class attributes + SimpleTool.name = tool_json_schema["name"] + SimpleTool.description = tool_json_schema["description"] + SimpleTool.inputs = tool_json_schema["parameters"]["properties"] + SimpleTool.output_type = tool_json_schema["return"]["type"] + + @wraps(tool_function) + def wrapped_function(*args, **kwargs): + return tool_function(*args, **kwargs) + + # Bind the copied function to the forward method + SimpleTool.forward = staticmethod(wrapped_function) + + # Get the signature parameters of the tool function + sig = inspect.signature(tool_function) + # - Add "self" as first parameter to tool_function signature + new_sig = sig.replace( + parameters=[inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD)] + list(sig.parameters.values()) ) - original_signature = inspect.signature(tool_function) - new_parameters = [inspect.Parameter("self", inspect.Parameter.POSITIONAL_ONLY)] + list( - original_signature.parameters.values() + # - Set the signature of the forward method + SimpleTool.forward.__signature__ = new_sig + + # Create and attach the source code of the dynamically created tool class and forward method + # - Get the source code of tool_function + tool_source = inspect.getsource(tool_function) + # - Remove the tool decorator and function definition line + tool_source_body = "\n".join(tool_source.split("\n")[2:]) + # - Dedent + tool_source_body = textwrap.dedent(tool_source_body) + # - Create the forward method source, including def line and indentation + forward_method_source = f"def forward{str(new_sig)}:\n{textwrap.indent(tool_source_body, ' ')}" + # - Create the class source + class_source = ( + textwrap.dedent(f""" + class SimpleTool(Tool): + name: str = "{tool_json_schema["name"]}" + description: str = {json.dumps(textwrap.dedent(tool_json_schema["description"]).strip())} + inputs: dict[str, dict[str, str]] = {tool_json_schema["parameters"]["properties"]} + output_type: str = "{tool_json_schema["return"]["type"]}" + + def __init__(self): + self.is_initialized = True + + """) + + textwrap.indent(forward_method_source, " ") # indent for class method ) - new_signature = original_signature.replace(parameters=new_parameters) - simple_tool.forward.__signature__ = new_signature + # - Store the source code on both class and method for inspection + SimpleTool.__source__ = class_source + SimpleTool.forward.__source__ = forward_method_source + + simple_tool = SimpleTool() return simple_tool @@ -927,7 +1039,7 @@ def __init__( token=None, **hub_kwargs, ): - if not is_torch_available() or not _is_package_available("accelerate"): + if not _is_package_available("accelerate") or not _is_package_available("torch"): raise ModuleNotFoundError( "Please install 'transformers' extra to use a PipelineTool: `pip install 'smolagents[transformers]'`" ) @@ -1009,15 +1121,15 @@ def decode(self, outputs): """ return self.post_processor(outputs) - def __call__(self, *args, **kwargs): + def __call__(self, *args, sanitize_inputs_outputs: bool = False, **kwargs): import torch from accelerate.utils import send_to_device - args, kwargs = handle_agent_input_types(*args, **kwargs) - if not self.is_initialized: self.setup() + if sanitize_inputs_outputs: + args, kwargs = handle_agent_input_types(*args, **kwargs) encoded_inputs = self.encode(*args, **kwargs) tensor_inputs = {k: v for k, v in encoded_inputs.items() if isinstance(v, torch.Tensor)} @@ -1027,8 +1139,35 @@ def __call__(self, *args, **kwargs): outputs = self.forward({**encoded_inputs, **non_tensor_inputs}) outputs = send_to_device(outputs, "cpu") decoded_outputs = self.decode(outputs) + if sanitize_inputs_outputs: + decoded_outputs = handle_agent_output_types(decoded_outputs, self.output_type) + return decoded_outputs + + +def get_tools_definition_code(tools: dict[str, Tool]) -> str: + tool_codes = [] + for tool in tools.values(): + validate_tool_attributes(tool.__class__, check_imports=False) + tool_code = instance_to_source(tool, base_cls=Tool) + tool_code = tool_code.replace("from smolagents.tools import Tool", "") + tool_code += f"\n\n{tool.name} = {tool.__class__.__name__}()\n" + tool_codes.append(tool_code) + + tool_definition_code = "\n".join([f"import {module}" for module in BASE_BUILTIN_MODULES]) + tool_definition_code += textwrap.dedent( + """ + from typing import Any + + class Tool: + def __call__(self, *args, **kwargs): + return self.forward(*args, **kwargs) - return handle_agent_output_types(decoded_outputs, self.output_type) + def forward(self, *args, **kwargs): + pass # to be implemented in child class + """ + ) + tool_definition_code += "\n\n".join(tool_codes) + return tool_definition_code __all__ = [ diff --git a/src/smolagents/utils.py b/src/smolagents/utils.py index 3f7219b61..49b212dd7 100644 --- a/src/smolagents/utils.py +++ b/src/smolagents/utils.py @@ -20,13 +20,15 @@ import importlib.util import inspect import json +import keyword import os import re -import textwrap import types from functools import lru_cache from io import BytesIO -from typing import TYPE_CHECKING, Any, Dict, Tuple, Union +from pathlib import Path +from textwrap import dedent +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: @@ -45,11 +47,6 @@ def _is_package_available(package_name: str) -> bool: return False -@lru_cache -def _is_pillow_available(): - return importlib.util.find_spec("PIL") is not None - - BASE_BUILTIN_MODULES = [ "collections", "datetime", @@ -65,15 +62,28 @@ def _is_pillow_available(): ] +def escape_code_brackets(text: str) -> str: + """Escapes square brackets in code segments while preserving Rich styling tags.""" + + def replace_bracketed_content(match): + content = match.group(1) + cleaned = re.sub( + r"bold|red|green|blue|yellow|magenta|cyan|white|black|italic|dim|\s|#[0-9a-fA-F]{6}", "", content + ) + return f"\\[{content}\\]" if cleaned.strip() else f"[{content}]" + + return re.sub(r"\[([^\]]*)\]", replace_bracketed_content, text) + + class AgentError(Exception): """Base class for other agent-related exceptions""" def __init__(self, message, logger: "AgentLogger"): super().__init__(message) self.message = message - logger.log(f"[bold red]{message}[/bold red]", level="ERROR") + logger.log_error(message) - def dict(self) -> Dict[str, str]: + def dict(self) -> dict[str, str]: return {"type": self.__class__.__name__, "message": str(self.message)} @@ -95,6 +105,18 @@ class AgentMaxStepsError(AgentError): pass +class AgentToolCallError(AgentExecutionError): + """Exception raised for errors when incorrect arguments are passed to the tool""" + + pass + + +class AgentToolExecutionError(AgentExecutionError): + """Exception raised for errors when executing a tool""" + + pass + + class AgentGenerationError(AgentError): """Exception raised for errors in generation in the agent""" @@ -127,13 +149,16 @@ def make_json_serializable(obj: Any) -> Any: return str(obj) -def parse_json_blob(json_blob: str) -> Dict[str, str]: +def parse_json_blob(json_blob: str) -> tuple[dict[str, str], str]: + "Extracts the JSON blob from the input and returns the JSON data and the rest of the input." try: first_accolade_index = json_blob.find("{") last_accolade_index = [a.start() for a in list(re.finditer("}", json_blob))][-1] - json_blob = json_blob[first_accolade_index : last_accolade_index + 1].replace('\\"', "'") - json_data = json.loads(json_blob, strict=False) - return json_data + json_data = json_blob[first_accolade_index : last_accolade_index + 1] + json_data = json.loads(json_data, strict=False) + return json_data, json_blob[:first_accolade_index] + except IndexError: + raise ValueError("The model output does not contain any JSON blob.") except json.JSONDecodeError as e: place = e.pos if json_blob[place - 1 : place + 2] == "},\n": @@ -145,70 +170,63 @@ def parse_json_blob(json_blob: str) -> Dict[str, str]: f"JSON blob was: {json_blob}, decoding failed on that specific part of the blob:\n" f"'{json_blob[place - 4 : place + 5]}'." ) - except Exception as e: - raise ValueError(f"Error in parsing the JSON blob: {e}") - - -def parse_code_blobs(code_blob: str) -> str: - """Parses the LLM's output to get any code blob inside. Will return the code directly if it's code.""" - pattern = r"```(?:py|python)?\n(.*?)\n```" - matches = re.findall(pattern, code_blob, re.DOTALL) - if len(matches) == 0: - try: # Maybe the LLM outputted a code blob directly - ast.parse(code_blob) - return code_blob - except SyntaxError: - pass - - if "final" in code_blob and "answer" in code_blob: - raise ValueError( - f""" -Your code snippet is invalid, because the regex pattern {pattern} was not found in it. -Here is your code snippet: -{code_blob} -It seems like you're trying to return the final answer, you can do it as follows: -Code: -```py -final_answer("YOUR FINAL ANSWER HERE") -```""".strip() - ) + + +def parse_code_blobs(text: str) -> str: + """Extract code blocs from the LLM's output. + + If a valid code block is passed, it returns it directly. + + Args: + text (`str`): LLM's output text to parse. + + Returns: + `str`: Extracted code block. + + Raises: + ValueError: If no valid code block is found in the text. + """ + pattern = r"```(?:py|python)?\s*\n(.*?)\n```" + matches = re.findall(pattern, text, re.DOTALL) + if matches: + return "\n\n".join(match.strip() for match in matches) + # Maybe the LLM outputted a code blob directly + try: + ast.parse(text) + return text + except SyntaxError: + pass + + if "final" in text and "answer" in text: raise ValueError( - f""" -Your code snippet is invalid, because the regex pattern {pattern} was not found in it. -Here is your code snippet: -{code_blob} -Make sure to include code with the correct pattern, for instance: -Thoughts: Your thoughts -Code: -```py -# Your python code here -```""".strip() + dedent( + f""" + Your code snippet is invalid, because the regex pattern {pattern} was not found in it. + Here is your code snippet: + {text} + It seems like you're trying to return the final answer, you can do it as follows: + Code: + ```py + final_answer("YOUR FINAL ANSWER HERE") + ``` + """ + ).strip() ) - return "\n\n".join(match.strip() for match in matches) - - -def parse_json_tool_call(json_blob: str) -> Tuple[str, Union[str, None]]: - json_blob = json_blob.replace("```json", "").replace("```", "") - tool_call = parse_json_blob(json_blob) - tool_name_key, tool_arguments_key = None, None - for possible_tool_name_key in ["action", "tool_name", "tool", "name", "function"]: - if possible_tool_name_key in tool_call: - tool_name_key = possible_tool_name_key - for possible_tool_arguments_key in [ - "action_input", - "tool_arguments", - "tool_args", - "parameters", - ]: - if possible_tool_arguments_key in tool_call: - tool_arguments_key = possible_tool_arguments_key - if tool_name_key is not None: - if tool_arguments_key is not None: - return tool_call[tool_name_key], tool_call[tool_arguments_key] - else: - return tool_call[tool_name_key], None - error_msg = "No tool name key found in tool call!" + f" Tool call: {json_blob}" - raise AgentParsingError(error_msg) + raise ValueError( + dedent( + f""" + Your code snippet is invalid, because the regex pattern {pattern} was not found in it. + Here is your code snippet: + {text} + Make sure to include code with the correct pattern, for instance: + Thoughts: Your thoughts + Code: + ```py + # Your python code here + ``` + """ + ).strip() + ) MAX_LENGTH_TRUNCATE_CONTENT = 20000 @@ -316,8 +334,14 @@ def instance_to_source(instance, base_cls=None): name: func for name, func in cls.__dict__.items() if callable(func) - and not ( - base_cls and hasattr(base_cls, name) and getattr(base_cls, name).__code__.co_code == func.__code__.co_code + and ( + not base_cls + or not hasattr(base_cls, name) + or ( + isinstance(func, staticmethod) + or isinstance(func, classmethod) + or (getattr(base_cls, name).__code__.co_code != func.__code__.co_code) + ) ) } @@ -382,7 +406,9 @@ def get_source(obj) -> str: inspect_error = None try: - return textwrap.dedent(inspect.getsource(obj)).strip() + # Handle dynamically created classes + source = getattr(obj, "__source__", None) or inspect.getsource(obj) + return dedent(source).strip() except OSError as e: # let's keep track of the exception to raise it if all further methods fail inspect_error = e @@ -399,7 +425,7 @@ def get_source(obj) -> str: tree = ast.parse(all_cells) for node in ast.walk(tree): if isinstance(node, (ast.ClassDef, ast.FunctionDef)) and node.name == obj.__name__: - return textwrap.dedent("\n".join(all_cells.split("\n")[node.lineno - 1 : node.end_lineno])).strip() + return dedent("\n".join(all_cells.split("\n")[node.lineno - 1 : node.end_lineno])).strip() raise ValueError(f"Could not find source code for {obj.__name__} in IPython history") except ImportError: # IPython is not available, let's just raise the original inspect error @@ -419,8 +445,12 @@ def make_image_url(base64_image): return f"data:image/png;base64,{base64_image}" -def make_init_file(folder: str): +def make_init_file(folder: str | Path): os.makedirs(folder, exist_ok=True) # Create __init__ with open(os.path.join(folder, "__init__.py"), "w"): pass + + +def is_valid_name(name: str) -> bool: + return name.isidentifier() and not keyword.iskeyword(name) if isinstance(name, str) else False diff --git a/src/smolagents/vision_web_browser.py b/src/smolagents/vision_web_browser.py index 46a07f99d..8886ec97e 100644 --- a/src/smolagents/vision_web_browser.py +++ b/src/smolagents/vision_web_browser.py @@ -3,8 +3,8 @@ from time import sleep import helium +import PIL.Image from dotenv import load_dotenv -from PIL import Image from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys @@ -37,7 +37,7 @@ def parse_arguments(): "--model-type", type=str, default="LiteLLMModel", - help="The model type to use (e.g., OpenAIServerModel, LiteLLMModel, TransformersModel, HfApiModel)", + help="The model type to use (e.g., OpenAIServerModel, LiteLLMModel, TransformersModel, InferenceClientModel)", ) parser.add_argument( "--model-id", @@ -45,6 +45,12 @@ def parse_arguments(): default="gpt-4o", help="The model ID to use for the specified model type", ) + parser.add_argument( + "--provider", + type=str, + default=None, + help="The inference provider to use for the model", + ) return parser.parse_args() @@ -57,7 +63,7 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2: previous_memory_step.observations_images = None png_bytes = driver.get_screenshot_as_png() - image = Image.open(BytesIO(png_bytes)) + image = PIL.Image.open(BytesIO(png_bytes)) print(f"Captured a browser screenshot: {image.size} pixels") memory_step.observations_images = [image.copy()] # Create a copy to ensure it persists, important! @@ -187,23 +193,26 @@ def initialize_agent(model): """ -def main(): +def run_webagent(prompt: str, model_type: str, model_id: str, provider: str) -> None: # Load environment variables load_dotenv() - # Parse command line arguments - args = parse_arguments() - # Initialize the model based on the provided arguments - model = load_model(args.model_type, args.model_id) + model = load_model(model_type, model_id, provider=provider, api_base=None, api_key=None) global driver driver = initialize_driver() agent = initialize_agent(model) # Run the agent with the provided prompt - agent.python_executor("from helium import *", agent.state) - agent.run(args.prompt + helium_instructions) + agent.python_executor("from helium import *") + agent.run(prompt + helium_instructions) + + +def main() -> None: + # Parse command line arguments + args = parse_arguments() + run_webagent(args.prompt, args.model_type, args.model_id, args.provider) if __name__ == "__main__": diff --git a/tests/conftest.py b/tests/conftest.py index a3896e2db..cca27193a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,9 @@ from smolagents.monitoring import LogLevel +# Import fixture modules as plugins +pytest_plugins = ["tests.fixtures.agents", "tests.fixtures.tools"] + original_multi_step_agent_init = MultiStepAgent.__init__ diff --git a/tests/fixtures/000000039769.png b/tests/data/000000039769.png similarity index 100% rename from tests/fixtures/000000039769.png rename to tests/data/000000039769.png diff --git a/tests/fixtures/agents.py b/tests/fixtures/agents.py new file mode 100644 index 000000000..450d7016b --- /dev/null +++ b/tests/fixtures/agents.py @@ -0,0 +1,97 @@ +import pytest + + +AGENT_DICTS = { + "v1.9": { + "tools": [], + "model": { + "class": "InferenceClientModel", + "data": { + "last_input_token_count": None, + "last_output_token_count": None, + "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "provider": None, + }, + }, + "managed_agents": {}, + "prompt_templates": { + "system_prompt": "dummy system prompt", + "planning": { + "initial_facts": "dummy planning initial facts", + "initial_plan": "dummy planning initial plan", + "update_facts_pre_messages": "dummy planning update facts pre messages", + "update_facts_post_messages": "dummy planning update facts post messages", + "update_plan_pre_messages": "dummy planning update plan pre messages", + "update_plan_post_messages": "dummy planning update plan post messages", + }, + "managed_agent": { + "task": "dummy managed agent task", + "report": "dummy managed agent report", + }, + "final_answer": { + "pre_messages": "dummy final answer pre messages", + "post_messages": "dummy final answer post messages", + }, + }, + "max_steps": 10, + "verbosity_level": 2, + "grammar": None, + "planning_interval": 2, + "name": "test_agent", + "description": "dummy description", + "requirements": ["smolagents"], + "authorized_imports": ["pandas"], + }, + # Added: executor_type, executor_kwargs, max_print_outputs_length + "v1.10": { + "tools": [], + "model": { + "class": "InferenceClientModel", + "data": { + "last_input_token_count": None, + "last_output_token_count": None, + "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "provider": None, + }, + }, + "managed_agents": {}, + "prompt_templates": { + "system_prompt": "dummy system prompt", + "planning": { + "initial_facts": "dummy planning initial facts", + "initial_plan": "dummy planning initial plan", + "update_facts_pre_messages": "dummy planning update facts pre messages", + "update_facts_post_messages": "dummy planning update facts post messages", + "update_plan_pre_messages": "dummy planning update plan pre messages", + "update_plan_post_messages": "dummy planning update plan post messages", + }, + "managed_agent": { + "task": "dummy managed agent task", + "report": "dummy managed agent report", + }, + "final_answer": { + "pre_messages": "dummy final answer pre messages", + "post_messages": "dummy final answer post messages", + }, + }, + "max_steps": 10, + "verbosity_level": 2, + "grammar": None, + "planning_interval": 2, + "name": "test_agent", + "description": "dummy description", + "requirements": ["smolagents"], + "authorized_imports": ["pandas"], + "executor_type": "local", + "executor_kwargs": {}, + "max_print_outputs_length": None, + }, +} + + +@pytest.fixture +def get_agent_dict(): + def _get_agent_dict(agent_dict_key): + return AGENT_DICTS[agent_dict_key] + + return _get_agent_dict diff --git a/tests/fixtures/tools.py b/tests/fixtures/tools.py new file mode 100644 index 000000000..dae7ea576 --- /dev/null +++ b/tests/fixtures/tools.py @@ -0,0 +1,87 @@ +import pytest + +from smolagents.tools import Tool, tool + + +@pytest.fixture +def example_tool(): + @tool + def valid_tool_function(input: str) -> str: + """A valid tool function. + + Args: + input (str): Input string. + """ + return input.upper() + + return valid_tool_function + + +@pytest.fixture +def boolean_default_tool_class(): + class BooleanDefaultTool(Tool): + name = "boolean_default_tool" + description = "A tool with a boolean default parameter" + inputs = { + "text": {"type": "string", "description": "Input text"}, + "flag": {"type": "boolean", "description": "Boolean flag with default value", "nullable": True}, + } + output_type = "string" + + def forward(self, text: str, flag: bool = False) -> str: + return f"Text: {text}, Flag: {flag}" + + return BooleanDefaultTool() + + +@pytest.fixture +def boolean_default_tool_function(): + @tool + def boolean_default_tool(text: str, flag: bool = False) -> str: + """ + A tool with a boolean default parameter. + + Args: + text: Input text + flag: Boolean flag with default value + """ + return f"Text: {text}, Flag: {flag}" + + return boolean_default_tool + + +@pytest.fixture +def optional_input_tool_class(): + class OptionalInputTool(Tool): + name = "optional_input_tool" + description = "A tool with an optional input parameter" + inputs = { + "required_text": {"type": "string", "description": "Required input text"}, + "optional_text": {"type": "string", "description": "Optional input text", "nullable": True}, + } + output_type = "string" + + def forward(self, required_text: str, optional_text: str | None = None) -> str: + if optional_text: + return f"{required_text} + {optional_text}" + return required_text + + return OptionalInputTool() + + +@pytest.fixture +def optional_input_tool_function(): + @tool + def optional_input_tool(required_text: str, optional_text: str | None = None) -> str: + """ + A tool with an optional input parameter. + + Args: + required_text: Required input text + optional_text: Optional input text + """ + if optional_text: + return f"{required_text} + {optional_text}" + return required_text + + return optional_input_tool diff --git a/tests/test_agents.py b/tests/test_agents.py index 376cc0869..826c2f2e4 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -12,18 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import io import os import tempfile import unittest import uuid +from contextlib import nullcontext as does_not_raise from pathlib import Path -from unittest.mock import MagicMock +from typing import Any +from unittest.mock import MagicMock, patch import pytest -from transformers.testing_utils import get_tests_dir +from huggingface_hub import ( + ChatCompletionOutputFunctionDefinition, + ChatCompletionOutputMessage, + ChatCompletionOutputToolCall, +) +from rich.console import Console +from smolagents import EMPTY_PROMPT_TEMPLATES from smolagents.agent_types import AgentImage, AgentText from smolagents.agents import ( + AgentError, AgentMaxStepsError, CodeAgent, MultiStepAgent, @@ -31,18 +41,20 @@ ToolCallingAgent, populate_template, ) -from smolagents.default_tools import DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool -from smolagents.memory import PlanningStep +from smolagents.default_tools import DuckDuckGoSearchTool, FinalAnswerTool, PythonInterpreterTool, VisitWebpageTool +from smolagents.memory import ActionStep, PlanningStep from smolagents.models import ( ChatMessage, ChatMessageToolCall, ChatMessageToolCallDefinition, - HfApiModel, + InferenceClientModel, MessageRole, + Model, TransformersModel, ) -from smolagents.tools import tool -from smolagents.utils import BASE_BUILTIN_MODULES +from smolagents.monitoring import AgentLogger, LogLevel +from smolagents.tools import Tool, tool +from smolagents.utils import BASE_BUILTIN_MODULES, AgentExecutionError, AgentGenerationError, AgentToolCallError def get_new_path(suffix="") -> str: @@ -50,8 +62,15 @@ def get_new_path(suffix="") -> str: return os.path.join(directory, str(uuid.uuid4()) + suffix) -class FakeToolCallModel: - def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None): +@pytest.fixture +def agent_logger(): + return AgentLogger( + LogLevel.DEBUG, console=Console(record=True, no_color=True, force_terminal=False, file=io.StringIO()) + ) + + +class FakeToolCallModel(Model): + def generate(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None): if len(messages) < 3: return ChatMessage( role="assistant", @@ -80,8 +99,8 @@ def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, gramm ) -class FakeToolCallModelImage: - def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None): +class FakeToolCallModelImage(Model): + def generate(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None): if len(messages) < 3: return ChatMessage( role="assistant", @@ -111,8 +130,8 @@ def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, gramm ) -class FakeToolCallModelVL: - def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None): +class FakeToolCallModelVL(Model): + def generate(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None): if len(messages) < 3: return ChatMessage( role="assistant", @@ -145,38 +164,40 @@ def __call__(self, messages, tools_to_call_from=None, stop_sequences=None, gramm ) -def fake_code_model(messages, stop_sequences=None, grammar=None) -> str: - prompt = str(messages) - if "special_marker" not in prompt: - return ChatMessage( - role="assistant", - content=""" +class FakeCodeModel(Model): + def generate(self, messages, stop_sequences=None, grammar=None): + prompt = str(messages) + if "special_marker" not in prompt: + return ChatMessage( + role="assistant", + content=""" Thought: I should multiply 2 by 3.6452. special_marker Code: ```py result = 2**3.6452 ``` """, - ) - else: # We're at step 2 - return ChatMessage( - role="assistant", - content=""" + ) + else: # We're at step 2 + return ChatMessage( + role="assistant", + content=""" Thought: I can now answer the initial question Code: ```py final_answer(7.2904) ``` """, - ) + ) -def fake_code_model_error(messages, stop_sequences=None) -> str: - prompt = str(messages) - if "special_marker" not in prompt: - return ChatMessage( - role="assistant", - content=""" +class FakeCodeModelError(Model): + def generate(self, messages, stop_sequences=None): + prompt = str(messages) + if "special_marker" not in prompt: + return ChatMessage( + role="assistant", + content=""" Thought: I should multiply 2 by 3.6452. special_marker Code: ```py @@ -187,26 +208,27 @@ def error_function(): error_function() ``` """, - ) - else: # We're at step 2 - return ChatMessage( - role="assistant", - content=""" + ) + else: # We're at step 2 + return ChatMessage( + role="assistant", + content=""" Thought: I faced an error in the previous step. Code: ```py final_answer("got an error") ``` """, - ) + ) -def fake_code_model_syntax_error(messages, stop_sequences=None) -> str: - prompt = str(messages) - if "special_marker" not in prompt: - return ChatMessage( - role="assistant", - content=""" +class FakeCodeModelSyntaxError(Model): + def generate(self, messages, stop_sequences=None): + prompt = str(messages) + if "special_marker" not in prompt: + return ChatMessage( + role="assistant", + content=""" Thought: I should multiply 2 by 3.6452. special_marker Code: ```py @@ -216,24 +238,25 @@ def fake_code_model_syntax_error(messages, stop_sequences=None) -> str: print("Ok, calculation done!") ``` """, - ) - else: # We're at step 2 - return ChatMessage( - role="assistant", - content=""" + ) + else: # We're at step 2 + return ChatMessage( + role="assistant", + content=""" Thought: I can now answer the initial question Code: ```py final_answer("got an error") ``` """, - ) + ) -def fake_code_model_import(messages, stop_sequences=None) -> str: - return ChatMessage( - role="assistant", - content=""" +class FakeCodeModelImport(Model): + def generate(self, messages, stop_sequences=None): + return ChatMessage( + role="assistant", + content=""" Thought: I can answer the question Code: ```py @@ -241,15 +264,16 @@ def fake_code_model_import(messages, stop_sequences=None) -> str: final_answer("got an error") ``` """, - ) + ) -def fake_code_functiondef(messages, stop_sequences=None) -> str: - prompt = str(messages) - if "special_marker" not in prompt: - return ChatMessage( - role="assistant", - content=""" +class FakeCodeModelFunctionDef(Model): + def generate(self, messages, stop_sequences=None): + prompt = str(messages) + if "special_marker" not in prompt: + return ChatMessage( + role="assistant", + content=""" Thought: Let's define the function. special_marker Code: ```py @@ -258,12 +282,12 @@ def fake_code_functiondef(messages, stop_sequences=None) -> str: def moving_average(x, w): return np.convolve(x, np.ones(w), 'valid') / w ``` -""", - ) - else: # We're at step 2 - return ChatMessage( - role="assistant", - content=""" + """, + ) + else: # We're at step 2 + return ChatMessage( + role="assistant", + content=""" Thought: I can now answer the initial question Code: ```py @@ -272,13 +296,14 @@ def moving_average(x, w): final_answer(res) ``` """, - ) + ) -def fake_code_model_single_step(messages, stop_sequences=None, grammar=None) -> str: - return ChatMessage( - role="assistant", - content=""" +class FakeCodeModelSingleStep(Model): + def generate(self, messages, stop_sequences=None, grammar=None): + return ChatMessage( + role="assistant", + content=""" Thought: I should multiply 2 by 3.6452. special_marker Code: ```py @@ -286,13 +311,14 @@ def fake_code_model_single_step(messages, stop_sequences=None, grammar=None) -> final_answer(result) ``` """, - ) + ) -def fake_code_model_no_return(messages, stop_sequences=None, grammar=None) -> str: - return ChatMessage( - role="assistant", - content=""" +class FakeCodeModelNoReturn(Model): + def generate(self, messages, stop_sequences=None, grammar=None): + return ChatMessage( + role="assistant", + content=""" Thought: I should multiply 2 by 3.6452. special_marker Code: ```py @@ -300,10 +326,10 @@ def fake_code_model_no_return(messages, stop_sequences=None, grammar=None) -> st print(result) ``` """, - ) + ) -class AgentTests(unittest.TestCase): +class TestAgent: def test_fake_toolcalling_agent(self): agent = ToolCallingAgent(tools=[PythonInterpreterTool()], model=FakeToolCallModel()) output = agent.run("What is 2 multiplied by 3.6452?") @@ -311,32 +337,35 @@ def test_fake_toolcalling_agent(self): assert "7.2904" in output assert agent.memory.steps[0].task == "What is 2 multiplied by 3.6452?" assert "7.2904" in agent.memory.steps[1].observations - assert agent.memory.steps[2].model_output is None + assert agent.memory.steps[2].model_output == "Called Tool: 'final_answer' with arguments: {'answer': '7.2904'}" - def test_toolcalling_agent_handles_image_tool_outputs(self): - from PIL import Image + def test_toolcalling_agent_handles_image_tool_outputs(self, shared_datadir): + import PIL.Image @tool - def fake_image_generation_tool(prompt: str) -> Image.Image: + def fake_image_generation_tool(prompt: str) -> PIL.Image.Image: """Tool that generates an image. Args: prompt: The prompt """ - return Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png") + + import PIL.Image + + return PIL.Image.open(shared_datadir / "000000039769.png") agent = ToolCallingAgent(tools=[fake_image_generation_tool], model=FakeToolCallModelImage()) output = agent.run("Make me an image.") assert isinstance(output, AgentImage) - assert isinstance(agent.state["image.png"], Image.Image) + assert isinstance(agent.state["image.png"], PIL.Image.Image) - def test_toolcalling_agent_handles_image_inputs(self): - from PIL import Image + def test_toolcalling_agent_handles_image_inputs(self, shared_datadir): + import PIL.Image - image = Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png") # dummy input + image = PIL.Image.open(shared_datadir / "000000039769.png") # dummy input @tool - def fake_image_understanding_tool(prompt: str, image: Image.Image) -> str: + def fake_image_understanding_tool(prompt: str, image: PIL.Image.Image) -> str: """Tool that creates a caption for an image. Args: @@ -350,7 +379,7 @@ def fake_image_understanding_tool(prompt: str, image: Image.Image) -> str: assert output == "The image is a cat." def test_fake_code_agent(self): - agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model) + agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel()) output = agent.run("What is 2 multiplied by 3.6452?") assert isinstance(output, float) assert output == 7.2904 @@ -360,16 +389,15 @@ def test_fake_code_agent(self): ] def test_additional_args_added_to_task(self): - agent = CodeAgent(tools=[], model=fake_code_model) + agent = CodeAgent(tools=[], model=FakeCodeModel()) agent.run( "What is 2 multiplied by 3.6452?", additional_args={"instruction": "Remember this."}, ) assert "Remember this" in agent.task - assert "Remember this" in str(agent.input_messages) def test_reset_conversations(self): - agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model) + agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel()) output = agent.run("What is 2 multiplied by 3.6452?", reset=True) assert output == 7.2904 assert len(agent.memory.steps) == 3 @@ -382,33 +410,13 @@ def test_reset_conversations(self): assert output == 7.2904 assert len(agent.memory.steps) == 3 - def test_code_agent_code_errors_show_offending_line_and_error(self): - agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model_error) - output = agent.run("What is 2 multiplied by 3.6452?") - assert isinstance(output, AgentText) - assert output == "got an error" - assert "Code execution failed at line 'error_function()'" in str(agent.memory.steps[1].error) - assert "ValueError" in str(agent.memory.steps) - - def test_code_agent_code_error_saves_previous_print_outputs(self): - agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model_error) - agent.run("What is 2 multiplied by 3.6452?") - assert "Flag!" in str(agent.memory.steps[1].observations) - - def test_code_agent_syntax_error_show_offending_lines(self): - agent = CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model_syntax_error) - output = agent.run("What is 2 multiplied by 3.6452?") - assert isinstance(output, AgentText) - assert output == "got an error" - assert ' print("Failing due to unexpected indent")' in str(agent.memory.steps) - def test_setup_agent_with_empty_toolbox(self): ToolCallingAgent(model=FakeToolCallModel(), tools=[]) def test_fails_max_steps(self): agent = CodeAgent( tools=[PythonInterpreterTool()], - model=fake_code_model_no_return, # use this callable because it never ends + model=FakeCodeModelNoReturn(), # use this callable because it never ends max_steps=5, ) answer = agent.run("What is 2 multiplied by 3.6452?") @@ -416,49 +424,60 @@ def test_fails_max_steps(self): assert type(agent.memory.steps[-1].error) is AgentMaxStepsError assert isinstance(answer, str) + agent = CodeAgent( + tools=[PythonInterpreterTool()], + model=FakeCodeModelNoReturn(), # use this callable because it never ends + max_steps=5, + ) + answer = agent.run("What is 2 multiplied by 3.6452?", max_steps=3) + assert len(agent.memory.steps) == 5 # Task step + 3 action steps + Final answer + assert type(agent.memory.steps[-1].error) is AgentMaxStepsError + assert isinstance(answer, str) + def test_tool_descriptions_get_baked_in_system_prompt(self): tool = PythonInterpreterTool() tool.name = "fake_tool_name" tool.description = "fake_tool_description" - agent = CodeAgent(tools=[tool], model=fake_code_model) + agent = CodeAgent(tools=[tool], model=FakeCodeModel()) agent.run("Empty task") - assert tool.name in agent.system_prompt - assert tool.description in agent.system_prompt + assert agent.system_prompt is not None + assert f"def {tool.name}(" in agent.system_prompt + assert f'"""{tool.description}' in agent.system_prompt def test_module_imports_get_baked_in_system_prompt(self): - agent = CodeAgent(tools=[], model=fake_code_model) + agent = CodeAgent(tools=[], model=FakeCodeModel()) agent.run("Empty task") for module in BASE_BUILTIN_MODULES: assert module in agent.system_prompt def test_init_agent_with_different_toolsets(self): toolset_1 = [] - agent = CodeAgent(tools=toolset_1, model=fake_code_model) + agent = CodeAgent(tools=toolset_1, model=FakeCodeModel()) assert len(agent.tools) == 1 # when no tools are provided, only the final_answer tool is added by default toolset_2 = [PythonInterpreterTool(), PythonInterpreterTool()] with pytest.raises(ValueError) as e: - agent = CodeAgent(tools=toolset_2, model=fake_code_model) + agent = CodeAgent(tools=toolset_2, model=FakeCodeModel()) assert "Each tool or managed_agent should have a unique name!" in str(e) with pytest.raises(ValueError) as e: agent.name = "python_interpreter" agent.description = "empty" - CodeAgent(tools=[PythonInterpreterTool()], model=fake_code_model, managed_agents=[agent]) + CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel(), managed_agents=[agent]) assert "Each tool or managed_agent should have a unique name!" in str(e) # check that python_interpreter base tool does not get added to CodeAgent - agent = CodeAgent(tools=[], model=fake_code_model, add_base_tools=True) + agent = CodeAgent(tools=[], model=FakeCodeModel(), add_base_tools=True) assert len(agent.tools) == 3 # added final_answer tool + search + visit_webpage # check that python_interpreter base tool gets added to ToolCallingAgent - agent = ToolCallingAgent(tools=[], model=fake_code_model, add_base_tools=True) + agent = ToolCallingAgent(tools=[], model=FakeCodeModel(), add_base_tools=True) assert len(agent.tools) == 4 # added final_answer tool + search + visit_webpage def test_function_persistence_across_steps(self): agent = CodeAgent( tools=[], - model=fake_code_functiondef, + model=FakeCodeModelFunctionDef(), max_steps=2, additional_authorized_imports=["numpy"], ) @@ -466,58 +485,64 @@ def test_function_persistence_across_steps(self): assert res[0] == 0.5 def test_init_managed_agent(self): - agent = CodeAgent(tools=[], model=fake_code_functiondef, name="managed_agent", description="Empty") + agent = CodeAgent(tools=[], model=FakeCodeModelFunctionDef(), name="managed_agent", description="Empty") assert agent.name == "managed_agent" assert agent.description == "Empty" def test_agent_description_gets_correctly_inserted_in_system_prompt(self): - managed_agent = CodeAgent(tools=[], model=fake_code_functiondef, name="managed_agent", description="Empty") + managed_agent = CodeAgent( + tools=[], model=FakeCodeModelFunctionDef(), name="managed_agent", description="Empty" + ) manager_agent = CodeAgent( tools=[], - model=fake_code_functiondef, + model=FakeCodeModelFunctionDef(), managed_agents=[managed_agent], ) assert "You can also give tasks to team members." not in managed_agent.system_prompt assert "{{managed_agents_descriptions}}" not in managed_agent.system_prompt assert "You can also give tasks to team members." in manager_agent.system_prompt - def test_code_agent_missing_import_triggers_advice_in_error_log(self): - # Set explicit verbosity level to 1 to override the default verbosity level of -1 set in CI fixture - agent = CodeAgent(tools=[], model=fake_code_model_import, verbosity_level=1) - - with agent.logger.console.capture() as capture: - agent.run("Count to 3") - str_output = capture.get() - assert "`additional_authorized_imports`" in str_output.replace("\n", "") - - def test_replay_shows_logs(self): + def test_replay_shows_logs(self, agent_logger): agent = CodeAgent( - tools=[], model=fake_code_model_import, verbosity_level=0, additional_authorized_imports=["numpy"] + tools=[], + model=FakeCodeModelImport(), + verbosity_level=0, + additional_authorized_imports=["numpy"], + logger=agent_logger, ) agent.run("Count to 3") - with agent.logger.console.capture() as capture: - agent.replay() - str_output = capture.get().replace("\n", "") + str_output = agent_logger.console.export_text() + assert "New run" in str_output - assert "Agent output:" in str_output assert 'final_answer("got' in str_output assert "```" in str_output + agent = ToolCallingAgent(tools=[PythonInterpreterTool()], model=FakeToolCallModel(), verbosity_level=0) + agent.logger = agent_logger + + agent.run("What is 2 multiplied by 3.6452?") + agent.replay() + + str_output = agent_logger.console.export_text() + assert "Called Tool" in str_output + assert "arguments" in str_output + def test_code_nontrivial_final_answer_works(self): - def fake_code_model_final_answer(messages, stop_sequences=None, grammar=None): - return ChatMessage( - role="assistant", - content="""Code: + class FakeCodeModelFinalAnswer(Model): + def generate(self, messages, stop_sequences=None, grammar=None): + return ChatMessage( + role="assistant", + content="""Code: ```py def nested_answer(): final_answer("Correct!") nested_answer() ```""", - ) + ) - agent = CodeAgent(tools=[], model=fake_code_model_final_answer) + agent = CodeAgent(tools=[], model=FakeCodeModelFinalAnswer()) output = agent.run("Count to 3") assert output == "Correct!" @@ -541,9 +566,10 @@ def weather_api(location: str, celsius: bool = False) -> str: device_map="auto", do_sample=False, ) - agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1) - agent.run("What's the weather in Paris?") - assert agent.memory.steps[0].task == "What's the weather in Paris?" + agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1, verbosity_level=10) + task = "What is the weather in Paris? " + agent.run(task) + assert agent.memory.steps[0].task == task assert agent.memory.steps[1].tool_calls[0].name == "weather_api" step_memory_dict = agent.memory.get_succinct_steps()[1] assert step_memory_dict["model_output_message"].tool_calls[0].function.name == "weather_api" @@ -554,31 +580,109 @@ def test_final_answer_checks(self): def check_always_fails(final_answer, agent_memory): assert False, "Error raised in check" - agent = CodeAgent(model=fake_code_model, tools=[], final_answer_checks=[check_always_fails]) + agent = CodeAgent(model=FakeCodeModel(), tools=[], final_answer_checks=[check_always_fails]) agent.run("Dummy task.") assert "Error raised in check" in str(agent.write_memory_to_messages()) + def test_generation_errors_are_raised(self): + class FakeCodeModel(Model): + def generate(self, messages, stop_sequences=None, grammar=None): + assert False, "Generation failed" + + agent = CodeAgent(model=FakeCodeModel(), tools=[]) + with pytest.raises(AgentGenerationError) as e: + agent.run("Dummy task.") + assert len(agent.memory.steps) == 2 + assert "Generation failed" in str(e) + + +class CustomFinalAnswerTool(FinalAnswerTool): + def forward(self, answer) -> str: + return answer + "CUSTOM" + + +class MockTool(Tool): + def __init__(self, name): + self.name = name + self.description = "Mock tool description" + self.inputs = {} + self.output_type = "string" + + def forward(self): + return "Mock tool output" + + +class MockAgent: + def __init__(self, name, tools, description="Mock agent description"): + self.name = name + self.tools = {t.name: t for t in tools} + self.description = description + + +class DummyMultiStepAgent(MultiStepAgent): + def step(self, memory_step: ActionStep) -> None | Any: + return super().step(memory_step) + + def initialize_system_prompt(self): + pass + class TestMultiStepAgent: def test_instantiation_disables_logging_to_terminal(self): fake_model = MagicMock() - agent = MultiStepAgent(tools=[], model=fake_model) + agent = DummyMultiStepAgent(tools=[], model=fake_model) assert agent.logger.level == -1, "logging to terminal should be disabled for testing using a fixture" def test_instantiation_with_prompt_templates(self, prompt_templates): - agent = MultiStepAgent(tools=[], model=MagicMock(), prompt_templates=prompt_templates) + agent = DummyMultiStepAgent(tools=[], model=MagicMock(), prompt_templates=prompt_templates) assert agent.prompt_templates == prompt_templates assert agent.prompt_templates["system_prompt"] == "This is a test system prompt." assert "managed_agent" in agent.prompt_templates assert agent.prompt_templates["managed_agent"]["task"] == "Task for {{name}}: {{task}}" assert agent.prompt_templates["managed_agent"]["report"] == "Report for {{name}}: {{final_answer}}" + @pytest.mark.parametrize( + "tools, expected_final_answer_tool", + [([], FinalAnswerTool), ([CustomFinalAnswerTool()], CustomFinalAnswerTool)], + ) + def test_instantiation_with_final_answer_tool(self, tools, expected_final_answer_tool): + agent = DummyMultiStepAgent(tools=tools, model=MagicMock()) + assert "final_answer" in agent.tools + assert isinstance(agent.tools["final_answer"], expected_final_answer_tool) + + def test_logs_display_thoughts_even_if_error(self): + class FakeJsonModelNoCall(Model): + def generate(self, messages, stop_sequences=None, tools_to_call_from=None): + return ChatMessage( + role="assistant", + content="""I don't want to call tools today""", + tool_calls=None, + raw="""I don't want to call tools today""", + ) + + agent_toolcalling = ToolCallingAgent(model=FakeJsonModelNoCall(), tools=[], max_steps=1, verbosity_level=10) + with agent_toolcalling.logger.console.capture() as capture: + agent_toolcalling.run("Dummy task") + assert "don't" in capture.get() and "want" in capture.get() + + class FakeCodeModelNoCall(Model): + def generate(self, messages, stop_sequences=None): + return ChatMessage( + role="assistant", + content="""I don't want to write an action today""", + ) + + agent_code = CodeAgent(model=FakeCodeModelNoCall(), tools=[], max_steps=1, verbosity_level=10) + with agent_code.logger.console.capture() as capture: + agent_code.run("Dummy task") + assert "don't" in capture.get() and "want" in capture.get() + def test_step_number(self): fake_model = MagicMock() fake_model.last_input_token_count = 10 fake_model.last_output_token_count = 20 max_steps = 2 - agent = MultiStepAgent(tools=[], model=fake_model, max_steps=max_steps) + agent = DummyMultiStepAgent(tools=[], model=fake_model, max_steps=max_steps) assert hasattr(agent, "step_number"), "step_number attribute should be defined" assert agent.step_number == 0, "step_number should be initialized to 0" agent.run("Test task") @@ -591,20 +695,12 @@ def test_step_number(self): ( 1, [ - [{"role": MessageRole.USER, "content": [{"type": "text", "text": "INITIAL_FACTS_USER_PROMPT"}]}], [{"role": MessageRole.USER, "content": [{"type": "text", "text": "INITIAL_PLAN_USER_PROMPT"}]}], ], ), ( 2, [ - [ - { - "role": MessageRole.SYSTEM, - "content": [{"type": "text", "text": "UPDATE_FACTS_SYSTEM_PROMPT"}], - }, - {"role": MessageRole.USER, "content": [{"type": "text", "text": "UPDATE_FACTS_USER_PROMPT"}]}, - ], [ { "role": MessageRole.SYSTEM, @@ -623,22 +719,17 @@ def test_planning_step(self, step, expected_messages_list): model=fake_model, ) task = "Test task" - agent.planning_step(task, is_first_step=(step == 1), step=step) + planning_step = agent._generate_planning_step(task, is_first_step=(step == 1), step=step) expected_message_texts = { - "INITIAL_FACTS_USER_PROMPT": populate_template( - agent.prompt_templates["planning"]["initial_facts"], variables=dict(task=task) - ), "INITIAL_PLAN_USER_PROMPT": populate_template( agent.prompt_templates["planning"]["initial_plan"], variables=dict( task=task, tools=agent.tools, managed_agents=agent.managed_agents, - answer_facts=agent.memory.steps[0].model_output_message_facts.content, + answer_facts=planning_step.model_output_message.content, ), ), - "UPDATE_FACTS_SYSTEM_PROMPT": agent.prompt_templates["planning"]["update_facts_pre_messages"], - "UPDATE_FACTS_USER_PROMPT": agent.prompt_templates["planning"]["update_facts_post_messages"], "UPDATE_PLAN_SYSTEM_PROMPT": populate_template( agent.prompt_templates["planning"]["update_plan_pre_messages"], variables=dict(task=task) ), @@ -648,7 +739,7 @@ def test_planning_step(self, step, expected_messages_list): task=task, tools=agent.tools, managed_agents=agent.managed_agents, - facts_update=agent.memory.steps[0].model_output_message_facts.content, + facts_update=planning_step.model_output_message.content, remaining_steps=agent.max_steps - step, ), ), @@ -657,8 +748,6 @@ def test_planning_step(self, step, expected_messages_list): for expected_message in expected_messages: for expected_content in expected_message["content"]: expected_content["text"] = expected_message_texts[expected_content["text"]] - assert len(agent.memory.steps) == 1 - planning_step = agent.memory.steps[0] assert isinstance(planning_step, PlanningStep) expected_model_input_messages = expected_messages_list[0] model_input_messages = planning_step.model_input_messages @@ -675,7 +764,7 @@ def test_planning_step(self, step, expected_messages_list): for content, expected_content in zip(message["content"], expected_message["content"]): assert content == expected_content # Test calls to model - assert len(fake_model.call_args_list) == 2 + assert len(fake_model.call_args_list) == 1 for call_args, expected_messages in zip(fake_model.call_args_list, expected_messages_list): assert len(call_args.args) == 1 messages = call_args.args[0] @@ -760,6 +849,196 @@ def test_provide_final_answer(self, images, expected_messages_list): for content, expected_content in zip(message["content"], expected_message["content"]): assert content == expected_content + def test_interrupt(self): + fake_model = MagicMock() + fake_model.return_value.content = "Model output." + fake_model.last_input_token_count = None + + def interrupt_callback(memory_step, agent): + agent.interrupt() + + agent = CodeAgent( + tools=[], + model=fake_model, + step_callbacks=[interrupt_callback], + ) + with pytest.raises(AgentError) as e: + agent.run("Test task") + assert "Agent interrupted" in str(e) + + @pytest.mark.parametrize( + "tools, managed_agents, name, expectation", + [ + # Valid case: no duplicates + ( + [MockTool("tool1"), MockTool("tool2")], + [MockAgent("agent1", [MockTool("tool3")])], + "test_agent", + does_not_raise(), + ), + # Invalid case: duplicate tool names + ([MockTool("tool1"), MockTool("tool1")], [], "test_agent", pytest.raises(ValueError)), + # Invalid case: tool name same as managed agent name + ( + [MockTool("tool1")], + [MockAgent("tool1", [MockTool("final_answer")])], + "test_agent", + pytest.raises(ValueError), + ), + # Valid case: tool name same as managed agent's tool name + ([MockTool("tool1")], [MockAgent("agent1", [MockTool("tool1")])], "test_agent", does_not_raise()), + # Invalid case: duplicate managed agent name and managed agent tool name + ([MockTool("tool1")], [], "tool1", pytest.raises(ValueError)), + # Valid case: duplicate tool names across managed agents + ( + [MockTool("tool1")], + [ + MockAgent("agent1", [MockTool("tool2"), MockTool("final_answer")]), + MockAgent("agent2", [MockTool("tool2"), MockTool("final_answer")]), + ], + "test_agent", + does_not_raise(), + ), + ], + ) + def test_validate_tools_and_managed_agents(self, tools, managed_agents, name, expectation): + fake_model = MagicMock() + with expectation: + DummyMultiStepAgent( + tools=tools, + model=fake_model, + name=name, + managed_agents=managed_agents, + ) + + def test_from_dict(self): + # Create a test agent dictionary + agent_dict = { + "model": {"class": "TransformersModel", "data": {"model_id": "test/model"}}, + "tools": [ + { + "name": "valid_tool_function", + "code": 'from smolagents import Tool\nfrom typing import Any, Optional\n\nclass SimpleTool(Tool):\n name = "valid_tool_function"\n description = "A valid tool function."\n inputs = {"input":{"type":"string","description":"Input string."}}\n output_type = "string"\n\n def forward(self, input: str) -> str:\n """A valid tool function.\n\n Args:\n input (str): Input string.\n """\n return input.upper()', + "requirements": {"smolagents"}, + } + ], + "managed_agents": {}, + "prompt_templates": EMPTY_PROMPT_TEMPLATES, + "max_steps": 15, + "verbosity_level": 2, + "grammar": {"test": "grammar"}, + "planning_interval": 3, + "name": "test_agent", + "description": "Test agent description", + } + + # Call from_dict + with patch("smolagents.models.TransformersModel") as mock_model_class: + mock_model_instance = mock_model_class.from_dict.return_value + agent = DummyMultiStepAgent.from_dict(agent_dict) + + # Verify the agent was created correctly + assert agent.model == mock_model_instance + assert mock_model_class.from_dict.call_args.args[0] == {"model_id": "test/model"} + assert agent.max_steps == 15 + assert agent.logger.level == 2 + assert agent.grammar == {"test": "grammar"} + assert agent.planning_interval == 3 + assert agent.name == "test_agent" + assert agent.description == "Test agent description" + # Verify the tool was created correctly + assert sorted(agent.tools.keys()) == ["final_answer", "valid_tool_function"] + assert agent.tools["valid_tool_function"].name == "valid_tool_function" + assert agent.tools["valid_tool_function"].description == "A valid tool function." + assert agent.tools["valid_tool_function"].inputs == { + "input": {"type": "string", "description": "Input string."} + } + assert agent.tools["valid_tool_function"].output_type == "string" + assert agent.tools["valid_tool_function"]("test") == "TEST" + + # Test overriding with kwargs + with patch("smolagents.models.TransformersModel") as mock_model_class: + agent = DummyMultiStepAgent.from_dict(agent_dict, max_steps=30) + assert agent.max_steps == 30 + + +class TestToolCallingAgent(unittest.TestCase): + @patch("huggingface_hub.InferenceClient") + def test_toolcalling_agent_api(self, mock_inference_client): + mock_client = mock_inference_client.return_value + mock_response = mock_client.chat_completion.return_value + mock_response.choices[0].message = ChatCompletionOutputMessage( + role="assistant", content='{"name": "weather_api", "arguments": {"location": "Paris", "date": "today"}}' + ) + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 20 + + model = InferenceClientModel(model_id="test-model") + + from smolagents import tool + + @tool + def weather_api(location: str, date: str) -> str: + """ + Gets the weather in the next days at given location. + Args: + location: the location + date: the date + """ + return f"The weather in {location} on date:{date} is sunny." + + agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1) + agent.run("What's the weather in Paris?") + assert agent.memory.steps[0].task == "What's the weather in Paris?" + assert agent.memory.steps[1].tool_calls[0].name == "weather_api" + assert agent.memory.steps[1].tool_calls[0].arguments == {"location": "Paris", "date": "today"} + assert agent.memory.steps[1].observations == "The weather in Paris on date:today is sunny." + + mock_response.choices[0].message = ChatCompletionOutputMessage( + role="assistant", + content=None, + tool_calls=[ + ChatCompletionOutputToolCall( + function=ChatCompletionOutputFunctionDefinition( + name="weather_api", arguments='{"location": "Paris", "date": "today"}' + ), + id="call_0", + type="function", + ) + ], + ) + + agent.run("What's the weather in Paris?") + assert agent.memory.steps[0].task == "What's the weather in Paris?" + assert agent.memory.steps[1].tool_calls[0].name == "weather_api" + assert agent.memory.steps[1].tool_calls[0].arguments == {"location": "Paris", "date": "today"} + assert agent.memory.steps[1].observations == "The weather in Paris on date:today is sunny." + + @patch("huggingface_hub.InferenceClient") + def test_toolcalling_agent_api_misformatted_output(self, mock_inference_client): + """Test that even misformatted json blobs don't interrupt the run for a ToolCallingAgent.""" + mock_client = mock_inference_client.return_value + mock_response = mock_client.chat_completion.return_value + mock_response.choices[0].message = ChatCompletionOutputMessage( + role="assistant", content='{"name": weather_api", "arguments": {"location": "Paris", "date": "today"}}' + ) + + mock_response.usage.prompt_tokens = 10 + mock_response.usage.completion_tokens = 20 + + model = InferenceClientModel(model_id="test-model") + + logger = AgentLogger(console=Console(markup=False, no_color=True)) + + agent = ToolCallingAgent(model=model, tools=[], max_steps=2, verbosity_level=1, logger=logger) + with agent.logger.console.capture() as capture: + agent.run("What's the weather in Paris?") + assert agent.memory.steps[0].task == "What's the weather in Paris?" + assert agent.memory.steps[1].tool_calls is None + assert "The JSON blob you used is invalid" in agent.memory.steps[1].error.message + assert "Error while parsing" in capture.get() + assert len(agent.memory.steps) == 4 + class TestCodeAgent: @pytest.mark.parametrize("provide_run_summary", [False, True]) @@ -780,10 +1059,188 @@ def test_call_with_provide_run_summary(self, provide_run_summary): ) assert result == expected_summary + def test_errors_logging(self): + class FakeCodeModel(Model): + def generate(self, messages, stop_sequences=None, grammar=None): + return ChatMessage(role="assistant", content="Code:\n```py\nsecret=3;['1', '2'][secret]\n```") + + agent = CodeAgent(tools=[], model=FakeCodeModel(), verbosity_level=1) + + with agent.logger.console.capture() as capture: + agent.run("Test request") + assert "secret\\\\" in repr(capture.get()) + + def test_missing_import_triggers_advice_in_error_log(self): + # Set explicit verbosity level to 1 to override the default verbosity level of -1 set in CI fixture + agent = CodeAgent(tools=[], model=FakeCodeModelImport(), verbosity_level=1) + + with agent.logger.console.capture() as capture: + agent.run("Count to 3") + str_output = capture.get() + assert "`additional_authorized_imports`" in str_output.replace("\n", "") -class MultiAgentsTests(unittest.TestCase): - def test_multiagents_save(self): - model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=2096, temperature=0.5) + def test_errors_show_offending_line_and_error(self): + agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelError()) + output = agent.run("What is 2 multiplied by 3.6452?") + assert isinstance(output, AgentText) + assert output == "got an error" + assert "Code execution failed at line 'error_function()'" in str(agent.memory.steps[1].error) + assert "ValueError" in str(agent.memory.steps) + + def test_error_saves_previous_print_outputs(self): + agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelError(), verbosity_level=10) + agent.run("What is 2 multiplied by 3.6452?") + assert "Flag!" in str(agent.memory.steps[1].observations) + + def test_syntax_error_show_offending_lines(self): + agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelSyntaxError()) + output = agent.run("What is 2 multiplied by 3.6452?") + assert isinstance(output, AgentText) + assert output == "got an error" + assert ' print("Failing due to unexpected indent")' in str(agent.memory.steps) + + def test_end_code_appending(self): + # Checking original output message + orig_output = FakeCodeModelNoReturn().generate([]) + assert not orig_output.content.endswith("") + + # Checking the step output + agent = CodeAgent( + tools=[PythonInterpreterTool()], + model=FakeCodeModelNoReturn(), + max_steps=1, + ) + answer = agent.run("What is 2 multiplied by 3.6452?") + assert answer + + memory_steps = agent.memory.steps + actions_steps = [s for s in memory_steps if isinstance(s, ActionStep)] + + outputs = [s.model_output for s in actions_steps if s.model_output] + assert outputs + assert all(o.endswith("") for o in outputs) + + messages = [s.model_output_message for s in actions_steps if s.model_output_message] + assert messages + assert all(m.content.endswith("") for m in messages) + + def test_change_tools_after_init(self): + from smolagents import tool + + @tool + def fake_tool_1() -> str: + """Fake tool""" + return "1" + + @tool + def fake_tool_2() -> str: + """Fake tool""" + return "2" + + class FakeCodeModel(Model): + def generate(self, messages, stop_sequences=None, grammar=None): + return ChatMessage(role="assistant", content="Code:\n```py\nfinal_answer(fake_tool_1())\n```") + + agent = CodeAgent(tools=[fake_tool_1], model=FakeCodeModel()) + + agent.tools["final_answer"] = CustomFinalAnswerTool() + agent.tools["fake_tool_1"] = fake_tool_2 + + answer = agent.run("Fake task.") + assert answer == "2CUSTOM" + + @pytest.mark.parametrize("agent_dict_version", ["v1.9", "v1.10"]) + def test_from_folder(self, agent_dict_version, get_agent_dict): + agent_dict = get_agent_dict(agent_dict_version) + with ( + patch("smolagents.agents.Path") as mock_path, + patch("smolagents.models.InferenceClientModel") as mock_model, + ): + import json + + mock_path.return_value.__truediv__.return_value.read_text.return_value = json.dumps(agent_dict) + mock_model.from_dict.return_value.model_id = "Qwen/Qwen2.5-Coder-32B-Instruct" + agent = CodeAgent.from_folder("ignored_dummy_folder") + assert isinstance(agent, CodeAgent) + assert agent.name == "test_agent" + assert agent.description == "dummy description" + assert agent.max_steps == 10 + assert agent.planning_interval == 2 + assert agent.grammar is None + assert agent.additional_authorized_imports == ["pandas"] + assert "pandas" in agent.authorized_imports + assert agent.executor_type == "local" + assert agent.executor_kwargs == {} + assert agent.max_print_outputs_length is None + assert agent.managed_agents == {} + assert set(agent.tools.keys()) == {"final_answer"} + assert agent.model == mock_model.from_dict.return_value + assert mock_model.from_dict.call_args.args[0]["model_id"] == "Qwen/Qwen2.5-Coder-32B-Instruct" + assert agent.model.model_id == "Qwen/Qwen2.5-Coder-32B-Instruct" + assert agent.logger.level == 2 + assert agent.prompt_templates["system_prompt"] == "dummy system prompt" + + def test_from_dict(self): + # Create a test agent dictionary + agent_dict = { + "model": {"class": "InferenceClientModel", "data": {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"}}, + "tools": [ + { + "name": "valid_tool_function", + "code": 'from smolagents import Tool\nfrom typing import Any, Optional\n\nclass SimpleTool(Tool):\n name = "valid_tool_function"\n description = "A valid tool function."\n inputs = {"input":{"type":"string","description":"Input string."}}\n output_type = "string"\n\n def forward(self, input: str) -> str:\n """A valid tool function.\n\n Args:\n input (str): Input string.\n """\n return input.upper()', + "requirements": {"smolagents"}, + } + ], + "managed_agents": {}, + "prompt_templates": EMPTY_PROMPT_TEMPLATES, + "max_steps": 15, + "verbosity_level": 2, + "grammar": None, + "planning_interval": 3, + "name": "test_code_agent", + "description": "Test code agent description", + "authorized_imports": ["pandas", "numpy"], + "executor_type": "local", + "executor_kwargs": {"max_workers": 2}, + "max_print_outputs_length": 1000, + } + + # Call from_dict + with patch("smolagents.models.InferenceClientModel") as mock_model_class: + mock_model_instance = mock_model_class.from_dict.return_value + agent = CodeAgent.from_dict(agent_dict) + + # Verify the agent was created correctly with CodeAgent-specific parameters + assert agent.model == mock_model_instance + assert agent.additional_authorized_imports == ["pandas", "numpy"] + assert agent.executor_type == "local" + assert agent.executor_kwargs == {"max_workers": 2} + assert agent.max_print_outputs_length == 1000 + + # Test with missing optional parameters + minimal_agent_dict = { + "model": {"class": "InferenceClientModel", "data": {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"}}, + "tools": [], + "managed_agents": {}, + } + + with patch("smolagents.models.InferenceClientModel"): + agent = CodeAgent.from_dict(minimal_agent_dict) + # Verify defaults are used + assert agent.max_steps == 20 # default from MultiStepAgent.__init__ + + # Test overriding with kwargs + with patch("smolagents.models.InferenceClientModel"): + agent = CodeAgent.from_dict( + agent_dict, additional_authorized_imports=["matplotlib"], executor_kwargs={"max_workers": 4} + ) + assert agent.additional_authorized_imports == ["matplotlib"] + assert agent.executor_kwargs == {"max_workers": 4} + + +class TestMultiAgents: + def test_multiagents_save(self, tmp_path): + model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=2096, temperature=0.5) web_agent = ToolCallingAgent( model=model, @@ -799,8 +1256,10 @@ def test_multiagents_save(self): additional_authorized_imports=["pandas", "datetime"], managed_agents=[web_agent, code_agent], max_print_outputs_length=1000, + executor_type="local", + executor_kwargs={"max_workers": 2}, ) - agent.save("agent_export") + agent.save(tmp_path) expected_structure = { "managed_agents": { @@ -829,24 +1288,25 @@ def verify_structure(current_path: Path, structure: dict): assert file_path.exists(), f"File {file_path} does not exist" assert file_path.is_file(), f"{file_path} is not a file" - verify_structure(Path("agent_export"), expected_structure) + verify_structure(tmp_path, expected_structure) # Test that re-loaded agents work as expected. - agent2 = CodeAgent.from_folder("agent_export", planning_interval=5) + agent2 = CodeAgent.from_folder(tmp_path, planning_interval=5) assert agent2.planning_interval == 5 # Check that kwargs are used assert set(agent2.authorized_imports) == set(["pandas", "datetime"] + BASE_BUILTIN_MODULES) assert agent2.max_print_outputs_length == 1000 - assert agent2.use_e2b_executor is False + assert agent2.executor_type == "local" + assert agent2.executor_kwargs == {"max_workers": 2} assert ( agent2.managed_agents["web_agent"].tools["web_search"].max_results == 10 ) # For now tool init parameters are forgotten assert agent2.model.kwargs["temperature"] == pytest.approx(0.5) def test_multiagents(self): - class FakeModelMultiagentsManagerAgent: + class FakeModelMultiagentsManagerAgent(Model): model_id = "fake_model" - def __call__( + def generate( self, messages, stop_sequences=None, @@ -911,10 +1371,10 @@ def __call__( manager_model = FakeModelMultiagentsManagerAgent() - class FakeModelMultiagentsManagedAgent: + class FakeModelMultiagentsManagedAgent(Model): model_id = "fake_model" - def __call__( + def generate( self, messages, tools_to_call_from=None, @@ -923,7 +1383,7 @@ def __call__( ): return ChatMessage( role="assistant", - content="", + content="Here is the secret content: FLAG1", tool_calls=[ ChatMessageToolCall( id="call_0", @@ -944,6 +1404,7 @@ def __call__( max_steps=10, name="search_agent", description="Runs web searches for you. Give it your request as an argument. Make the request as detailed as needed, you can ask for thorough reports", + verbosity_level=2, ) manager_code_agent = CodeAgent( @@ -962,11 +1423,15 @@ def __call__( managed_agents=[web_agent], ) - report = manager_toolcalling_agent.run("Fake question.") + with web_agent.logger.console.capture() as capture: + report = manager_toolcalling_agent.run("Fake question.") assert report == "Final report." + assert "FLAG1" in capture.get() # Check that managed agent's output is properly logged # Test that visualization works - manager_code_agent.visualize() + with manager_toolcalling_agent.logger.console.capture() as capture: + manager_toolcalling_agent.visualize() + assert "โ”œโ”€โ”€" in capture.get() @pytest.fixture @@ -974,4 +1439,55 @@ def prompt_templates(): return { "system_prompt": "This is a test system prompt.", "managed_agent": {"task": "Task for {{name}}: {{task}}", "report": "Report for {{name}}: {{final_answer}}"}, + "planning": { + "initial_plan": "The plan.", + "update_plan_pre_messages": "custom", + "update_plan_post_messages": "custom", + }, + "final_answer": {"pre_messages": "custom", "post_messages": "custom"}, } + + +@pytest.mark.parametrize( + "arguments", + [ + {}, + {"arg": "bar"}, + {None: None}, + [1, 2, 3], + ], +) +def test_tool_calling_agents_raises_tool_call_error_being_invoked_with_wrong_arguments(arguments): + @tool + def _sample_tool(prompt: str) -> str: + """Tool that returns same string + + Args: + prompt: The string to return + Returns: + The same string + """ + + return prompt + + agent = ToolCallingAgent(model=FakeToolCallModel(), tools=[_sample_tool]) + with pytest.raises(AgentToolCallError): + agent.execute_tool_call(_sample_tool.name, arguments) + + +def test_tool_calling_agents_raises_agent_execution_error_when_tool_raises(): + @tool + def _sample_tool(_: str) -> float: + """Tool that fails + + Args: + _: The pointless string + Returns: + Some number + """ + + return 1 / 0 + + agent = ToolCallingAgent(model=FakeToolCallModel(), tools=[_sample_tool]) + with pytest.raises(AgentExecutionError): + agent.execute_tool_call(_sample_tool.name, "sample") diff --git a/tests/test_all_docs.py b/tests/test_all_docs.py index 0786e9138..0784af413 100644 --- a/tests/test_all_docs.py +++ b/tests/test_all_docs.py @@ -21,7 +21,6 @@ import tempfile import traceback from pathlib import Path -from typing import List import pytest from dotenv import load_dotenv @@ -33,7 +32,7 @@ class SubprocessCallException(Exception): pass -def run_command(command: List[str], return_stdout=False, env=None): +def run_command(command: list[str], return_stdout=False, env=None): """ Runs command with subprocess.check_output and returns stdout if requested. Properly captures and handles errors during command execution. @@ -61,14 +60,14 @@ class DocCodeExtractor: """Handles extraction and validation of Python code from markdown files.""" @staticmethod - def extract_python_code(content: str) -> List[str]: + def extract_python_code(content: str) -> list[str]: """Extract Python code blocks from markdown content.""" pattern = r"```(?:python|py)\n(.*?)\n```" matches = re.finditer(pattern, content, re.DOTALL) return [match.group(1).strip() for match in matches] @staticmethod - def create_test_script(code_blocks: List[str], tmp_dir: str) -> Path: + def create_test_script(code_blocks: list[str], tmp_dir: str) -> Path: """Create a temporary Python script from code blocks.""" combined_code = "\n\n".join(code_blocks) assert len(combined_code) > 0, "Code is empty!" @@ -80,6 +79,7 @@ def create_test_script(code_blocks: List[str], tmp_dir: str) -> Path: return tmp_file +# Skip: slow tests + require API keys @require_run_all class TestDocs: """Test case for documentation code testing.""" diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 000000000..bded39665 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,112 @@ +from unittest.mock import patch + +import pytest + +from smolagents.cli import load_model +from smolagents.local_python_executor import LocalPythonExecutor +from smolagents.models import InferenceClientModel, LiteLLMModel, OpenAIServerModel, TransformersModel + + +@pytest.fixture +def set_env_vars(monkeypatch): + monkeypatch.setenv("FIREWORKS_API_KEY", "test_fireworks_api_key") + monkeypatch.setenv("HF_TOKEN", "test_hf_api_key") + + +def test_load_model_openai_server_model(set_env_vars): + with patch("openai.OpenAI") as MockOpenAI: + model = load_model("OpenAIServerModel", "test_model_id") + assert isinstance(model, OpenAIServerModel) + assert model.model_id == "test_model_id" + assert MockOpenAI.call_count == 1 + assert MockOpenAI.call_args.kwargs["base_url"] == "https://api.fireworks.ai/inference/v1" + assert MockOpenAI.call_args.kwargs["api_key"] == "test_fireworks_api_key" + + +def test_load_model_litellm_model(): + model = load_model("LiteLLMModel", "test_model_id", api_key="test_api_key", api_base="https://api.test.com") + assert isinstance(model, LiteLLMModel) + assert model.api_key == "test_api_key" + assert model.api_base == "https://api.test.com" + assert model.model_id == "test_model_id" + + +def test_load_model_transformers_model(): + with ( + patch( + "transformers.AutoModelForImageTextToText.from_pretrained", + side_effect=ValueError("Unrecognized configuration class"), + ), + patch("transformers.AutoModelForCausalLM.from_pretrained"), + patch("transformers.AutoTokenizer.from_pretrained"), + ): + model = load_model("TransformersModel", "test_model_id") + assert isinstance(model, TransformersModel) + assert model.model_id == "test_model_id" + + +def test_load_model_hf_api_model(set_env_vars): + with patch("huggingface_hub.InferenceClient") as huggingface_hub_InferenceClient: + model = load_model("InferenceClientModel", "test_model_id") + assert isinstance(model, InferenceClientModel) + assert model.model_id == "test_model_id" + assert huggingface_hub_InferenceClient.call_count == 1 + assert huggingface_hub_InferenceClient.call_args.kwargs["token"] == "test_hf_api_key" + + +def test_load_model_invalid_model_type(): + with pytest.raises(ValueError, match="Unsupported model type: InvalidModel"): + load_model("InvalidModel", "test_model_id") + + +def test_cli_main(capsys): + with patch("smolagents.cli.load_model") as mock_load_model: + mock_load_model.return_value = "mock_model" + with patch("smolagents.cli.CodeAgent") as mock_code_agent: + from smolagents.cli import run_smolagent + + run_smolagent("test_prompt", [], "InferenceClientModel", "test_model_id", provider="hf-inference") + # load_model + assert len(mock_load_model.call_args_list) == 1 + assert mock_load_model.call_args.args == ("InferenceClientModel", "test_model_id") + assert mock_load_model.call_args.kwargs == {"api_base": None, "api_key": None, "provider": "hf-inference"} + # CodeAgent + assert len(mock_code_agent.call_args_list) == 1 + assert mock_code_agent.call_args.args == () + assert mock_code_agent.call_args.kwargs == { + "tools": [], + "model": "mock_model", + "additional_authorized_imports": None, + } + # agent.run + assert len(mock_code_agent.return_value.run.call_args_list) == 1 + assert mock_code_agent.return_value.run.call_args.args == ("test_prompt",) + # print + captured = capsys.readouterr() + assert "Running agent with these tools: []" in captured.out + + +def test_vision_web_browser_main(): + with patch("smolagents.vision_web_browser.helium"): + with patch("smolagents.vision_web_browser.load_model") as mock_load_model: + mock_load_model.return_value = "mock_model" + with patch("smolagents.vision_web_browser.CodeAgent") as mock_code_agent: + from smolagents.vision_web_browser import helium_instructions, run_webagent + + run_webagent("test_prompt", "InferenceClientModel", "test_model_id", provider="hf-inference") + # load_model + assert len(mock_load_model.call_args_list) == 1 + assert mock_load_model.call_args.args == ("InferenceClientModel", "test_model_id") + # CodeAgent + assert len(mock_code_agent.call_args_list) == 1 + assert mock_code_agent.call_args.args == () + assert len(mock_code_agent.call_args.kwargs["tools"]) == 4 + assert mock_code_agent.call_args.kwargs["model"] == "mock_model" + assert mock_code_agent.call_args.kwargs["additional_authorized_imports"] == ["helium"] + # agent.python_executor + assert len(mock_code_agent.return_value.python_executor.call_args_list) == 1 + assert mock_code_agent.return_value.python_executor.call_args.args == ("from helium import *",) + assert LocalPythonExecutor(["helium"])("from helium import *") == (None, "", False) + # agent.run + assert len(mock_code_agent.return_value.run.call_args_list) == 1 + assert mock_code_agent.return_value.run.call_args.args == ("test_prompt" + helium_instructions,) diff --git a/tests/test_default_tools.py b/tests/test_default_tools.py index 5ff436ef3..3f3fad49c 100644 --- a/tests/test_default_tools.py +++ b/tests/test_default_tools.py @@ -17,7 +17,13 @@ import pytest from smolagents.agent_types import _AGENT_TYPE_MAPPING -from smolagents.default_tools import DuckDuckGoSearchTool, PythonInterpreterTool, SpeechToTextTool, VisitWebpageTool +from smolagents.default_tools import ( + DuckDuckGoSearchTool, + PythonInterpreterTool, + SpeechToTextTool, + VisitWebpageTool, + WikipediaSearchTool, +) from .test_tools import ToolTesterMixin @@ -34,24 +40,24 @@ def test_ddgs_with_kwargs(self): assert isinstance(result, str) -class PythonInterpreterToolTester(unittest.TestCase, ToolTesterMixin): - def setUp(self): +class TestPythonInterpreterTool(ToolTesterMixin): + def setup_method(self): self.tool = PythonInterpreterTool(authorized_imports=["numpy"]) self.tool.setup() def test_exact_match_arg(self): result = self.tool("(2 / 2) * 4") - self.assertEqual(result, "Stdout:\n\nOutput: 4.0") + assert result == "Stdout:\n\nOutput: 4.0" def test_exact_match_kwarg(self): result = self.tool(code="(2 / 2) * 4") - self.assertEqual(result, "Stdout:\n\nOutput: 4.0") + assert result == "Stdout:\n\nOutput: 4.0" def test_agent_type_output(self): inputs = ["2 * 2"] output = self.tool(*inputs, sanitize_inputs_outputs=True) output_type = _AGENT_TYPE_MAPPING[self.tool.output_type] - self.assertTrue(isinstance(output, output_type)) + assert isinstance(output, output_type) def test_agent_types_inputs(self): inputs = ["2 * 2"] @@ -67,7 +73,7 @@ def test_agent_types_inputs(self): # Should not raise an error output = self.tool(*inputs, sanitize_inputs_outputs=True) output_type = _AGENT_TYPE_MAPPING[self.tool.output_type] - self.assertTrue(isinstance(output, output_type)) + assert isinstance(output, output_type) def test_imports_work(self): result = self.tool("import numpy as np") @@ -87,3 +93,32 @@ def test_new_instance(self): assert tool is not None assert tool.pre_processor_class == WhisperProcessor assert tool.model_class == WhisperForConditionalGeneration + + +@pytest.mark.parametrize( + "language, content_type, extract_format, query", + [ + ("en", "summary", "HTML", "Python_(programming_language)"), # English, Summary Mode, HTML format + ("en", "text", "WIKI", "Python_(programming_language)"), # English, Full Text Mode, WIKI format + ("es", "summary", "HTML", "Python_(lenguaje_de_programaciรณn)"), # Spanish, Summary Mode, HTML format + ("es", "text", "WIKI", "Python_(lenguaje_de_programaciรณn)"), # Spanish, Full Text Mode, WIKI format + ], +) +def test_wikipedia_search(language, content_type, extract_format, query): + tool = WikipediaSearchTool( + user_agent="TestAgent (test@example.com)", + language=language, + content_type=content_type, + extract_format=extract_format, + ) + + result = tool.forward(query) + + assert isinstance(result, str), "Output should be a string" + assert "โœ… **Wikipedia Page:**" in result, "Response should contain Wikipedia page title" + assert "๐Ÿ”— **Read more:**" in result, "Response should contain Wikipedia page URL" + + if content_type == "summary": + assert len(result.split()) < 1000, "Summary mode should return a shorter text" + if content_type == "text": + assert len(result.split()) > 1000, "Full text mode should return a longer text" diff --git a/tests/test_e2b_executor.py b/tests/test_e2b_executor.py deleted file mode 100644 index 5994a44be..000000000 --- a/tests/test_e2b_executor.py +++ /dev/null @@ -1,18 +0,0 @@ -from unittest.mock import MagicMock, patch - -from smolagents.e2b_executor import E2BExecutor - - -class TestE2BExecutor: - def test_e2b_executor_instantiation(self): - logger = MagicMock() - with patch("e2b_code_interpreter.Sandbox") as mock_sandbox: - mock_sandbox.return_value.commands.run.return_value.error = None - mock_sandbox.return_value.run_code.return_value.error = None - executor = E2BExecutor(additional_imports=[], tools=[], logger=logger) - assert isinstance(executor, E2BExecutor) - assert executor.logger == logger - assert executor.final_answer is False - assert executor.custom_tools == {} - assert executor.final_answer_pattern.pattern == r"final_answer\((.*?)\)" - assert executor.sbx == mock_sandbox.return_value diff --git a/tests/test_final_answer.py b/tests/test_final_answer.py index fcfb02a3f..b960e2fb1 100644 --- a/tests/test_final_answer.py +++ b/tests/test_final_answer.py @@ -13,47 +13,44 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest -from pathlib import Path import numpy as np -from PIL import Image -from transformers import is_torch_available -from transformers.testing_utils import get_tests_dir, require_torch +import PIL.Image +import pytest from smolagents.agent_types import _AGENT_TYPE_MAPPING from smolagents.default_tools import FinalAnswerTool from .test_tools import ToolTesterMixin +from .utils.markers import require_torch -if is_torch_available(): - import torch - - -class FinalAnswerToolTester(unittest.TestCase, ToolTesterMixin): - def setUp(self): +class TestFinalAnswerTool(ToolTesterMixin): + def setup_method(self): self.inputs = {"answer": "Final answer"} self.tool = FinalAnswerTool() def test_exact_match_arg(self): result = self.tool("Final answer") - self.assertEqual(result, "Final answer") + assert result == "Final answer" def test_exact_match_kwarg(self): result = self.tool(answer=self.inputs["answer"]) - self.assertEqual(result, "Final answer") - - def create_inputs(self): - inputs_text = {"answer": "Text input"} - inputs_image = {"answer": Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png").resize((512, 512))} - inputs_audio = {"answer": torch.Tensor(np.ones(3000))} - return {"string": inputs_text, "image": inputs_image, "audio": inputs_audio} + assert result == "Final answer" @require_torch - def test_agent_type_output(self): - inputs = self.create_inputs() + def test_agent_type_output(self, inputs): for input_type, input in inputs.items(): output = self.tool(**input, sanitize_inputs_outputs=True) agent_type = _AGENT_TYPE_MAPPING[input_type] - self.assertTrue(isinstance(output, agent_type)) + assert isinstance(output, agent_type) + + @pytest.fixture + def inputs(self, shared_datadir): + import torch + + return { + "string": {"answer": "Text input"}, + "image": {"answer": PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512))}, + "audio": {"answer": torch.Tensor(np.ones(3000))}, + } diff --git a/tests/test_function_type_hints_utils.py b/tests/test_function_type_hints_utils.py index 3379237c6..fdb55f200 100644 --- a/tests/test_function_type_hints_utils.py +++ b/tests/test_function_type_hints_utils.py @@ -12,17 +12,234 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import unittest -from typing import List, Optional, Tuple +from typing import Any import pytest -from smolagents._function_type_hints_utils import get_imports, get_json_schema +from smolagents._function_type_hints_utils import DocstringParsingException, get_imports, get_json_schema -class TestJsonSchema(unittest.TestCase): - def test_get_json_schema(self): - def fn(x: int, y: Optional[Tuple[str, str, float]] = None) -> None: +@pytest.fixture +def valid_func(): + """A well-formed function with docstring, type hints, and return block.""" + + def multiply(x: int, y: float) -> float: + """ + Multiplies two numbers. + + Args: + x: The first number. + y: The second number. + Returns: + Product of x and y. + """ + return x * y + + return multiply + + +@pytest.fixture +def no_docstring_func(): + """Function with no docstring.""" + + def sample(x: int): + return x + + return sample + + +@pytest.fixture +def missing_arg_doc_func(): + """Function with docstring but missing an argument description.""" + + def add(x: int, y: int): + """ + Adds two numbers. + + Args: + x: The first number. + """ + return x + y + + return add + + +@pytest.fixture +def bad_return_func(): + """Function docstring with missing return description (allowed).""" + + def do_nothing(x: str | None = None): + """ + Does nothing. + + Args: + x: Some optional string. + """ + pass + + return do_nothing + + +@pytest.fixture +def complex_types_func(): + def process_data(items: list[str], config: dict[str, float], point: tuple[int, int]) -> dict: + """ + Process some data. + + Args: + items: List of items to process. + config: Configuration parameters. + point: A position as (x,y). + + Returns: + Processed data result. + """ + return {"result": True} + + return process_data + + +@pytest.fixture +def optional_types_func(): + def process_with_optional(required_arg: str, optional_arg: int | None = None) -> str: + """ + Process with optional argument. + + Args: + required_arg: A required string argument. + optional_arg: An optional integer argument. + + Returns: + Processing result. + """ + return "processed" + + return process_with_optional + + +@pytest.fixture +def enum_choices_func(): + def select_color(color: str) -> str: + """ + Select a color. + + Args: + color: The color to select (choices: ["red", "green", "blue"]) + + Returns: + Selected color. + """ + return color + + return select_color + + +@pytest.fixture +def union_types_func(): + def process_union(value: int | str) -> bool | str: + """ + Process a value that can be either int or string. + + Args: + value: An integer or string value. + + Returns: + Processing result. + """ + return True if isinstance(value, int) else "string result" + + return process_union + + +@pytest.fixture +def nested_types_func(): + def process_nested_data(data: list[dict[str, Any]]) -> list[str]: + """ + Process nested data structure. + + Args: + data: List of dictionaries to process. + + Returns: + List of processed results. + """ + return ["result"] + + return process_nested_data + + +@pytest.fixture +def typed_docstring_func(): + def calculate(x: int, y: float) -> float: + """ + Calculate something. + + Args: + x (int): An integer parameter with type in docstring. + y (float): A float parameter with type in docstring. + + Returns: + float: The calculated result. + """ + return x * y + + return calculate + + +@pytest.fixture +def mismatched_types_func(): + def convert(value: int) -> str: + """ + Convert a value. + + Args: + value (str): A string value (type mismatch with hint). + + Returns: + int: Converted value (type mismatch with hint). + """ + return str(value) + + return convert + + +@pytest.fixture +def complex_docstring_types_func(): + def process(data: dict[str, list[int]]) -> list[dict[str, Any]]: + """ + Process complex data. + + Args: + data (Dict[str, List[int]]): Nested structure with types. + + Returns: + List[Dict[str, Any]]: Processed results with types. + """ + return [{"result": sum(v) for k, v in data.items()}] + + return process + + +@pytest.fixture +def keywords_in_description_func(): + def process(value: str) -> str: + """ + Function with Args: or Returns: keywords in its description. + + Args: + value: A string value. + + Returns: + str: Processed value. + """ + return value.upper() + + return process + + +class TestGetJsonSchema: + def test_get_json_schema_example(self): + def fn(x: int, y: tuple[str, str, float] | None = None) -> None: """ Test function Args: @@ -50,10 +267,189 @@ def fn(x: int, y: Optional[Tuple[str, str, float]] = None) -> None: }, "return": {"type": "null"}, } - self.assertEqual( - schema["function"]["parameters"]["properties"]["y"], expected_schema["parameters"]["properties"]["y"] + assert schema["function"]["parameters"]["properties"]["y"] == expected_schema["parameters"]["properties"]["y"] + assert schema["function"] == expected_schema + + @pytest.mark.parametrize( + "fixture_name,should_fail", + [ + ("valid_func", False), + # ('no_docstring_func', True), + # ('missing_arg_doc_func', True), + ("bad_return_func", False), + ], + ) + def test_get_json_schema(self, request, fixture_name, should_fail): + func = request.getfixturevalue(fixture_name) + schema = get_json_schema(func) + assert schema["type"] == "function" + assert "function" in schema + assert "parameters" in schema["function"] + + @pytest.mark.parametrize( + "fixture_name,should_fail", + [ + # ('valid_func', False), + ("no_docstring_func", True), + ("missing_arg_doc_func", True), + # ('bad_return_func', False), + ], + ) + def test_get_json_schema_raises(self, request, fixture_name, should_fail): + func = request.getfixturevalue(fixture_name) + with pytest.raises(DocstringParsingException): + get_json_schema(func) + + @pytest.mark.parametrize( + "fixture_name,expected_properties", + [ + ("valid_func", {"x": "integer", "y": "number"}), + ("bad_return_func", {"x": "string"}), + ], + ) + def test_property_types(self, request, fixture_name, expected_properties): + """Test that property types are correctly mapped.""" + func = request.getfixturevalue(fixture_name) + schema = get_json_schema(func) + + properties = schema["function"]["parameters"]["properties"] + for prop_name, expected_type in expected_properties.items(): + assert properties[prop_name]["type"] == expected_type + + def test_schema_basic_structure(self, valid_func): + """Test that basic schema structure is correct.""" + schema = get_json_schema(valid_func) + # Check schema type + assert schema["type"] == "function" + assert "function" in schema + # Check function schema + function_schema = schema["function"] + assert function_schema["name"] == "multiply" + assert "description" in function_schema + assert function_schema["description"] == "Multiplies two numbers." + # Check parameters schema + assert "parameters" in function_schema + params = function_schema["parameters"] + assert params["type"] == "object" + assert "properties" in params + assert "required" in params + assert set(params["required"]) == {"x", "y"} + properties = params["properties"] + assert properties["x"]["type"] == "integer" + assert properties["y"]["type"] == "number" + # Check return schema + assert "return" in function_schema + return_schema = function_schema["return"] + assert return_schema["type"] == "number" + assert return_schema["description"] == "Product of x and y." + + def test_complex_types(self, complex_types_func): + """Test schema generation for complex types.""" + schema = get_json_schema(complex_types_func) + properties = schema["function"]["parameters"]["properties"] + # Check list type + assert properties["items"]["type"] == "array" + # Check dict type + assert properties["config"]["type"] == "object" + # Check tuple type + assert properties["point"]["type"] == "array" + assert len(properties["point"]["prefixItems"]) == 2 + assert properties["point"]["prefixItems"][0]["type"] == "integer" + assert properties["point"]["prefixItems"][1]["type"] == "integer" + + def test_optional_types(self, optional_types_func): + """Test schema generation for optional arguments.""" + schema = get_json_schema(optional_types_func) + params = schema["function"]["parameters"] + # Required argument should be in required list + assert "required_arg" in params["required"] + # Optional argument should not be in required list + assert "optional_arg" not in params["required"] + # Optional argument should be nullable + assert params["properties"]["optional_arg"]["nullable"] is True + assert params["properties"]["optional_arg"]["type"] == "integer" + + def test_enum_choices(self, enum_choices_func): + """Test schema generation for enum choices in docstring.""" + schema = get_json_schema(enum_choices_func) + color_prop = schema["function"]["parameters"]["properties"]["color"] + assert "enum" in color_prop + assert color_prop["enum"] == ["red", "green", "blue"] + + def test_union_types(self, union_types_func): + """Test schema generation for union types.""" + schema = get_json_schema(union_types_func) + value_prop = schema["function"]["parameters"]["properties"]["value"] + return_prop = schema["function"]["return"] + # Check union in parameter + assert len(value_prop["type"]) == 2 + # Check union in return type + assert len(return_prop["type"]) == 2 + + def test_nested_types(self, nested_types_func): + """Test schema generation for nested complex types.""" + schema = get_json_schema(nested_types_func) + data_prop = schema["function"]["parameters"]["properties"]["data"] + assert data_prop["type"] == "array" + + def test_typed_docstring_parsing(self, typed_docstring_func): + """Test parsing of docstrings with type annotations.""" + schema = get_json_schema(typed_docstring_func) + # Type hints should take precedence over docstring types + assert schema["function"]["parameters"]["properties"]["x"]["type"] == "integer" + assert schema["function"]["parameters"]["properties"]["y"]["type"] == "number" + # Description should be extracted correctly + assert ( + schema["function"]["parameters"]["properties"]["x"]["description"] + == "An integer parameter with type in docstring." ) - self.assertEqual(schema["function"], expected_schema) + assert ( + schema["function"]["parameters"]["properties"]["y"]["description"] + == "A float parameter with type in docstring." + ) + # Return type and description should be correct + assert schema["function"]["return"]["type"] == "number" + assert schema["function"]["return"]["description"] == "The calculated result." + + def test_mismatched_docstring_types(self, mismatched_types_func): + """Test that type hints take precedence over docstring types when they conflict.""" + schema = get_json_schema(mismatched_types_func) + # Type hints should take precedence over docstring types + assert schema["function"]["parameters"]["properties"]["value"]["type"] == "integer" + # Return type from type hint should be used, not docstring + assert schema["function"]["return"]["type"] == "string" + + def test_complex_docstring_types(self, complex_docstring_types_func): + """Test parsing of complex type annotations in docstrings.""" + schema = get_json_schema(complex_docstring_types_func) + # Check that complex nested type is parsed correctly from type hints + data_prop = schema["function"]["parameters"]["properties"]["data"] + assert data_prop["type"] == "object" + # Check return type + return_prop = schema["function"]["return"] + assert return_prop["type"] == "array" + # Description should include the type information from docstring + assert data_prop["description"] == "Nested structure with types." + assert return_prop["description"] == "Processed results with types." + + @pytest.mark.parametrize( + "fixture_name,expected_description", + [ + ("typed_docstring_func", "An integer parameter with type in docstring."), + ("complex_docstring_types_func", "Nested structure with types."), + ], + ) + def test_type_in_description_handling(self, request, fixture_name, expected_description): + """Test that type information in docstrings is preserved in description.""" + func = request.getfixturevalue(fixture_name) + schema = get_json_schema(func) + # First parameter description should contain the expected text + first_param_name = list(schema["function"]["parameters"]["properties"].keys())[0] + assert schema["function"]["parameters"]["properties"][first_param_name]["description"] == expected_description + + def test_with_special_words_in_description_func(self, keywords_in_description_func): + schema = get_json_schema(keywords_in_description_func) + assert schema["function"]["description"] == "Function with Args: or Returns: keywords in its description." class TestGetCode: @@ -114,5 +510,5 @@ class TestGetCode: ), ], ) - def test_get_imports(self, code: str, expected: List[str]): + def test_get_imports(self, code: str, expected: list[str]): assert sorted(get_imports(code)) == sorted(expected) diff --git a/tests/test_import.py b/tests/test_import.py index aaa284d39..c977de8d2 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -1,11 +1,27 @@ +import os import subprocess +import tempfile -def test_import_smolagents_without_extras(): - # Run the import statement in an isolated virtual environment - result = subprocess.run( - ["uv", "run", "--isolated", "--no-editable", "-"], input="import smolagents", text=True, capture_output=True - ) +def test_import_smolagents_without_extras(monkeypatch): + monkeypatch.delenv("VIRTUAL_ENV", raising=False) + with tempfile.TemporaryDirectory() as temp_dir: + # Create a virtual environment + venv_dir = os.path.join(temp_dir, "venv") + subprocess.run(["uv", "venv", venv_dir], check=True) + + # Install smolagents in the virtual environment + subprocess.run( + ["uv", "pip", "install", "--python", os.path.join(venv_dir, "bin", "python"), "smolagents @ ."], check=True + ) + + # Run the import test in the virtual environment + result = subprocess.run( + [os.path.join(venv_dir, "bin", "python"), "-c", "import smolagents"], + capture_output=True, + text=True, + ) + # Check if the import was successful assert result.returncode == 0, ( "Import failed with error: " diff --git a/tests/test_local_python_executor.py b/tests/test_local_python_executor.py index 29e1ec94c..f7d43c282 100644 --- a/tests/test_local_python_executor.py +++ b/tests/test_local_python_executor.py @@ -16,20 +16,27 @@ import ast import types import unittest +from contextlib import nullcontext as does_not_raise from textwrap import dedent +from unittest.mock import patch import numpy as np import pandas as pd import pytest -from smolagents.default_tools import BASE_PYTHON_TOOLS +from smolagents.default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool from smolagents.local_python_executor import ( + DANGEROUS_FUNCTIONS, + DANGEROUS_MODULES, InterpreterError, + LocalPythonExecutor, PrintContainer, - check_module_authorized, + check_import_authorized, + evaluate_boolop, evaluate_condition, evaluate_delete, evaluate_python_code, + evaluate_subscript, fix_final_answer_code, get_safe_module, ) @@ -52,14 +59,14 @@ def test_evaluate_assign(self): state = {} result, _ = evaluate_python_code(code, {}, state=state) assert result == 3 - self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": 2}) + self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": {"counter": 2}}) code = "x = y" state = {"y": 5} result, _ = evaluate_python_code(code, {}, state=state) # evaluate returns the value of the last assignment. assert result == 5 - self.assertDictEqualNoPrint(state, {"x": 5, "y": 5, "_operations_count": 2}) + self.assertDictEqualNoPrint(state, {"x": 5, "y": 5, "_operations_count": {"counter": 2}}) code = "a=1;b=None" result, _ = evaluate_python_code(code, {}, state={}) @@ -85,26 +92,46 @@ def test_evaluate_call(self): state = {"x": 3} result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state) assert result == 5 - self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": 3}) + self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": {"counter": 3}}) # Should not work without the tool - with pytest.raises(InterpreterError) as e: + with pytest.raises(InterpreterError, match="Forbidden function evaluation: 'add_two'"): evaluate_python_code(code, {}, state=state) - assert "tried to execute add_two" in str(e.value) + + def test_evaluate_class_def(self): + code = dedent('''\ + class MyClass: + """A class with a value.""" + + def __init__(self, value): + self.value = value + + def get_value(self): + return self.value + + instance = MyClass(42) + result = instance.get_value() + ''') + state = {} + result, _ = evaluate_python_code(code, {}, state=state) + assert result == 42 + assert state["instance"].__doc__ == "A class with a value." def test_evaluate_constant(self): code = "x = 3" state = {} result, _ = evaluate_python_code(code, {}, state=state) assert result == 3 - self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": 2}) + self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": {"counter": 2}}) def test_evaluate_dict(self): code = "test_dict = {'x': x, 'y': add_two(x)}" state = {"x": 3} result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state) self.assertDictEqual(result, {"x": 3, "y": 5}) - self.assertDictEqualNoPrint(state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": 7}) + self.assertDictEqualNoPrint( + state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": {"counter": 7}} + ) def test_evaluate_expression(self): code = "x = 3\ny = 5" @@ -112,7 +139,7 @@ def test_evaluate_expression(self): result, _ = evaluate_python_code(code, {}, state=state) # evaluate returns the value of the last assignment. assert result == 5 - self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": 4}) + self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": {"counter": 4}}) def test_evaluate_f_string(self): code = "text = f'This is x: {x}.'" @@ -120,14 +147,16 @@ def test_evaluate_f_string(self): result, _ = evaluate_python_code(code, {}, state=state) # evaluate returns the value of the last assignment. assert result == "This is x: 3." - self.assertDictEqualNoPrint(state, {"x": 3, "text": "This is x: 3.", "_operations_count": 6}) + self.assertDictEqualNoPrint(state, {"x": 3, "text": "This is x: 3.", "_operations_count": {"counter": 6}}) def test_evaluate_f_string_with_format(self): code = "text = f'This is x: {x:.2f}.'" state = {"x": 3.336} result, _ = evaluate_python_code(code, {}, state=state) assert result == "This is x: 3.34." - self.assertDictEqualNoPrint(state, {"x": 3.336, "text": "This is x: 3.34.", "_operations_count": 8}) + self.assertDictEqualNoPrint( + state, {"x": 3.336, "text": "This is x: 3.34.", "_operations_count": {"counter": 8}} + ) def test_evaluate_f_string_with_complex_format(self): code = "text = f'This is x: {x:>{width}.{precision}f}.'" @@ -135,7 +164,14 @@ def test_evaluate_f_string_with_complex_format(self): result, _ = evaluate_python_code(code, {}, state=state) assert result == "This is x: 3.34." self.assertDictEqualNoPrint( - state, {"x": 3.336, "width": 10, "precision": 2, "text": "This is x: 3.34.", "_operations_count": 14} + state, + { + "x": 3.336, + "width": 10, + "precision": 2, + "text": "This is x: 3.34.", + "_operations_count": {"counter": 14}, + }, ) def test_evaluate_if(self): @@ -144,40 +180,42 @@ def test_evaluate_if(self): result, _ = evaluate_python_code(code, {}, state=state) # evaluate returns the value of the last assignment. assert result == 2 - self.assertDictEqualNoPrint(state, {"x": 3, "y": 2, "_operations_count": 6}) + self.assertDictEqualNoPrint(state, {"x": 3, "y": 2, "_operations_count": {"counter": 6}}) state = {"x": 8} result, _ = evaluate_python_code(code, {}, state=state) # evaluate returns the value of the last assignment. assert result == 5 - self.assertDictEqualNoPrint(state, {"x": 8, "y": 5, "_operations_count": 6}) + self.assertDictEqualNoPrint(state, {"x": 8, "y": 5, "_operations_count": {"counter": 6}}) def test_evaluate_list(self): code = "test_list = [x, add_two(x)]" state = {"x": 3} result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state) self.assertListEqual(result, [3, 5]) - self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": 5}) + self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": {"counter": 5}}) def test_evaluate_name(self): code = "y = x" state = {"x": 3} result, _ = evaluate_python_code(code, {}, state=state) assert result == 3 - self.assertDictEqualNoPrint(state, {"x": 3, "y": 3, "_operations_count": 2}) + self.assertDictEqualNoPrint(state, {"x": 3, "y": 3, "_operations_count": {"counter": 2}}) def test_evaluate_subscript(self): code = "test_list = [x, add_two(x)]\ntest_list[1]" state = {"x": 3} result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state) assert result == 5 - self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": 9}) + self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": {"counter": 9}}) code = "test_dict = {'x': x, 'y': add_two(x)}\ntest_dict['y']" state = {"x": 3} result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state) assert result == 5 - self.assertDictEqualNoPrint(state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": 11}) + self.assertDictEqualNoPrint( + state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": {"counter": 11}} + ) code = "vendor = {'revenue': 31000, 'rent': 50312}; vendor['ratio'] = round(vendor['revenue'] / vendor['rent'], 2)" state = {} @@ -201,14 +239,14 @@ def test_evaluate_for(self): state = {} result, _ = evaluate_python_code(code, {"range": range}, state=state) assert result == 2 - self.assertDictEqualNoPrint(state, {"x": 2, "i": 2, "_operations_count": 11}) + self.assertDictEqualNoPrint(state, {"x": 2, "i": 2, "_operations_count": {"counter": 11}}) def test_evaluate_binop(self): code = "y + x" state = {"x": 3, "y": 6} result, _ = evaluate_python_code(code, {}, state=state) assert result == 9 - self.assertDictEqualNoPrint(state, {"x": 3, "y": 6, "_operations_count": 4}) + self.assertDictEqualNoPrint(state, {"x": 3, "y": 6, "_operations_count": {"counter": 4}}) def test_recursive_function(self): code = """ @@ -221,6 +259,38 @@ def recur_fibo(n): result, _ = evaluate_python_code(code, {}, state={}) assert result == 8 + def test_max_operations(self): + # Check that operation counter is not reset in functions + code = dedent( + """ + def func(a): + for j in range(10): + a += j + return a + + for i in range(5): + func(i) + """ + ) + with patch("smolagents.local_python_executor.MAX_OPERATIONS", 100): + with pytest.raises(InterpreterError) as exception_info: + evaluate_python_code(code, {"range": range}, state={}) + assert "Reached the max number of operations" in str(exception_info.value) + + def test_operations_count(self): + # Check that operation counter is not reset in functions + code = dedent( + """ + def func(): + return 0 + + func() + """ + ) + state = {} + evaluate_python_code(code, {"range": range}, state=state) + assert state["_operations_count"]["counter"] == 5 + def test_evaluate_string_methods(self): code = "'hello'.replace('h', 'o').split('e')" result, _ = evaluate_python_code(code, {}, state={}) @@ -232,9 +302,12 @@ def test_evaluate_slicing(self): assert result == "le" def test_access_attributes(self): - code = "integer = 1\nobj_class = integer.__class__\nobj_class" - result, _ = evaluate_python_code(code, {}, state={}) - assert result is int + class A: + attr = 2 + + code = "A.attr" + result, _ = evaluate_python_code(code, {}, state={"A": A}) + assert result == 2 def test_list_comprehension(self): code = "sentence = 'THESEAGULL43'\nmeaningful_sentence = '-'.join([char.lower() for char in sentence if char.isalpha()])" @@ -312,6 +385,11 @@ def test_listcomp(self): result, _ = evaluate_python_code(code, {"range": range}, state={}) assert result == [0, 1, 2] + def test_setcomp(self): + code = "batman_times = {entry['time'] for entry in [{'time': 10}, {'time': 19}, {'time': 20}]}" + result, _ = evaluate_python_code(code, {}, state={}) + assert result == {10, 19, 20} + def test_break_continue(self): code = "for i in range(10):\n if i == 5:\n break\ni" result, _ = evaluate_python_code(code, {"range": range}, state={}) @@ -359,17 +437,19 @@ def test_while(self): # test infinite loop code = "i = 0\nwhile i < 3:\n i -= 1\ni" - with pytest.raises(InterpreterError) as e: - evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}) - assert "iterations in While loop exceeded" in str(e) + with patch("smolagents.local_python_executor.MAX_WHILE_ITERATIONS", 100): + with pytest.raises(InterpreterError, match=".*Maximum number of 100 iterations in While loop exceeded"): + evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}) # test lazy evaluation - code = """ -house_positions = [0, 7, 10, 15, 18, 22, 22] -i, n, loc = 0, 7, 30 -while i < n and house_positions[i] <= loc: - i += 1 -""" + code = dedent( + """ + house_positions = [0, 7, 10, 15, 18, 22, 22] + i, n, loc = 0, 7, 30 + while i < n and house_positions[i] <= loc: + i += 1 + """ + ) state = {} evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state) @@ -399,6 +479,22 @@ def test_boolops(self): result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}) assert result == "Sacramento" + # Short-circuit evaluation: + # (T and 0) or (T and T) => 0 or True => True + code = "result = (x > 3 and y) or (z == 10 and not y)\nresult" + result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"x": 5, "y": 0, "z": 10}) + assert result + + # (None or "") or "Found" => "" or "Found" => "Found" + code = "result = (a or c) or b\nresult" + result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": None, "b": "Found", "c": ""}) + assert result == "Found" + + # ("First" and "") or "Third" => "" or "Third" -> "Third" + code = "result = (a and b) or c\nresult" + result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": "First", "b": "", "c": "Third"}) + assert result == "Third" + def test_if_conditions(self): code = """char='a' if char.isalpha(): @@ -446,22 +542,35 @@ def test_imports(self): # Test submodules are handled properly, thus not raising error code = "import numpy.random as rd\nrng = rd.default_rng(12345)\nrng.random()" - result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy"]) + result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.random"]) code = "from numpy.random import default_rng as d_rng\nrng = d_rng(12345)\nrng.random()" - result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy"]) + result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.random"]) def test_additional_imports(self): code = "import numpy as np" evaluate_python_code(code, authorized_imports=["numpy"], state={}) + # Test that allowing 'numpy.*' allows numpy root package and its submodules + code = "import numpy as np\nnp.random.default_rng(123)\nnp.array([1, 2])" + result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.*"]) + + # Test that allowing 'numpy.*' allows importing a submodule + code = "import numpy.random as rd\nrd.default_rng(12345)" + result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.*"]) + code = "import numpy.random as rd" evaluate_python_code(code, authorized_imports=["numpy.random"], state={}) - evaluate_python_code(code, authorized_imports=["numpy"], state={}) + evaluate_python_code(code, authorized_imports=["numpy.*"], state={}) evaluate_python_code(code, authorized_imports=["*"], state={}) with pytest.raises(InterpreterError): evaluate_python_code(code, authorized_imports=["random"], state={}) + with pytest.raises(InterpreterError): + evaluate_python_code(code, authorized_imports=["numpy.a"], state={}) + with pytest.raises(InterpreterError): + evaluate_python_code(code, authorized_imports=["numpy.a.*"], state={}) + def test_multiple_comparators(self): code = "0 <= -1 < 4 and 0 <= -5 < 4" result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}) @@ -937,22 +1046,6 @@ def test_fix_final_answer_code(self): Got: {result} """ - def test_dangerous_subpackage_access_blocked(self): - # Direct imports with dangerous patterns should fail - code = "import random._os" - with pytest.raises(InterpreterError): - evaluate_python_code(code) - - # Import of whitelisted modules should succeed but dangerous submodules should not exist - code = "import random;random._os.system('echo bad command passed')" - with pytest.raises(InterpreterError) as e: - evaluate_python_code(code) - assert "AttributeError: module 'random' has no attribute '_os'" in str(e) - - code = "import doctest;doctest.inspect.os.system('echo bad command passed')" - with pytest.raises(InterpreterError): - evaluate_python_code(code, authorized_imports=["doctest"]) - def test_close_matches_subscript(self): code = 'capitals = {"Czech Republic": "Prague", "Monaco": "Monaco", "Bhutan": "Thimphu"};capitals["Butan"]' with pytest.raises(Exception) as e: @@ -973,21 +1066,18 @@ def test_dangerous_builtins_calls_are_blocked(self): with pytest.raises(InterpreterError): evaluate_python_code(dangerous_code, static_tools=BASE_PYTHON_TOOLS) - def test_dangerous_builtins_are_callable_if_explicitly_added(self): - dangerous_code = """ -compile = callable.__self__.compile -eval = callable.__self__.eval -exec = callable.__self__.exec - -eval("1 + 1") -exec(compile("1 + 1", "no filename", "exec")) - -teval("1 + 1") -texec(tcompile("1 + 1", "no filename", "exec")) - """ + def test_final_answer_accepts_kwarg_answer(self): + code = "final_answer(answer=2)" + result, _ = evaluate_python_code(code, {"final_answer": (lambda x: 2 * x)}, state={}) + assert result == 4 + def test_dangerous_builtins_are_callable_if_explicitly_added(self): + dangerous_code = dedent(""" + eval("1 + 1") + exec(compile("1 + 1", "no filename", "exec")) + """) evaluate_python_code( - dangerous_code, static_tools={"tcompile": compile, "teval": eval, "texec": exec} | BASE_PYTHON_TOOLS + dangerous_code, static_tools={"compile": compile, "eval": eval, "exec": exec} | BASE_PYTHON_TOOLS ) def test_can_import_os_if_explicitly_authorized(self): @@ -998,6 +1088,64 @@ def test_can_import_os_if_all_imports_authorized(self): dangerous_code = "import os; os.listdir('./')" evaluate_python_code(dangerous_code, authorized_imports=["*"]) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") + def test_can_import_scipy_if_explicitly_authorized(self): + code = "import scipy" + evaluate_python_code(code, authorized_imports=["scipy"]) + + @pytest.mark.filterwarnings("ignore::DeprecationWarning") + def test_can_import_sklearn_if_explicitly_authorized(self): + code = "import sklearn" + evaluate_python_code(code, authorized_imports=["sklearn"]) + + def test_function_def_recovers_source_code(self): + executor = LocalPythonExecutor([]) + + executor.send_tools({"final_answer": FinalAnswerTool()}) + + res, _, _ = executor( + dedent( + """ + def target_function(): + return "Hello world" + + final_answer(target_function) + """ + ) + ) + assert res.__name__ == "target_function" + assert res.__source__ == "def target_function():\n return 'Hello world'" + + +def test_evaluate_annassign(): + code = dedent("""\ + # Basic annotated assignment + x: int = 42 + + # Type annotations with expressions + y: float = x / 2 + + # Type annotation without assignment + z: list + + # Type annotation with complex value + names: list = ["Alice", "Bob", "Charlie"] + + # Type hint shouldn't restrict values at runtime + s: str = 123 # Would be a type error in static checking, but valid at runtime + + # Access the values + result = (x, y, names, s) + """) + state = {} + evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state) + assert state["x"] == 42 + assert state["y"] == 21.0 + assert "z" not in state # z should be not be defined + assert state["names"] == ["Alice", "Bob", "Charlie"] + assert state["s"] == 123 # Type hints don't restrict at runtime + assert state["result"] == (42, 21.0, ["Alice", "Bob", "Charlie"], 123) + @pytest.mark.parametrize( "code, expected_result", @@ -1132,7 +1280,7 @@ def __{operator_name}__(self, other): del x[2] x[2] """), - "Index 2 out of bounds for list of length 2", + "IndexError: list index out of range", ), ( dedent("""\ @@ -1157,6 +1305,26 @@ def test_evaluate_python_code_with_evaluate_delete(code, expected_error_message) assert expected_error_message in str(exception_info.value) +@pytest.mark.parametrize("a", [1, 0]) +@pytest.mark.parametrize("b", [2, 0]) +@pytest.mark.parametrize("c", [3, 0]) +def test_evaluate_boolop_and(a, b, c): + boolop_ast = ast.parse("a and b and c").body[0].value + state = {"a": a, "b": b, "c": c} + result = evaluate_boolop(boolop_ast, state, {}, {}, []) + assert result == (a and b and c) + + +@pytest.mark.parametrize("a", [1, 0]) +@pytest.mark.parametrize("b", [2, 0]) +@pytest.mark.parametrize("c", [3, 0]) +def test_evaluate_boolop_or(a, b, c): + boolop_ast = ast.parse("a or b or c").body[0].value + state = {"a": a, "b": b, "c": c} + result = evaluate_boolop(boolop_ast, state, {}, {}, []) + assert result == (a or b or c) + + @pytest.mark.parametrize( "code, state, expectation", [ @@ -1303,6 +1471,123 @@ def test_evaluate_condition_with_pandas_exceptions(condition, state, expected_ex assert str(expected_exception) in str(exception_info.value) +@pytest.mark.parametrize( + "subscript, state, expected_result", + [ + ("dct[1]", {"dct": {1: 11, 2: 22}}, 11), + ("dct[2]", {"dct": {1: "a", 2: "b"}}, "b"), + ("dct['b']", {"dct": {"a": 1, "b": 2}}, 2), + ("dct['a']", {"dct": {"a": "aa", "b": "bb"}}, "aa"), + ("dct[1, 2]", {"dct": {(1, 2): 3}}, 3), # tuple-index + ("dct['a']['b']", {"dct": {"a": {"b": 1}}}, 1), # nested + ("lst[0]", {"lst": [1, 2, 3]}, 1), + ("lst[-1]", {"lst": [1, 2, 3]}, 3), + ("lst[1:3]", {"lst": [1, 2, 3, 4]}, [2, 3]), + ("lst[:]", {"lst": [1, 2, 3]}, [1, 2, 3]), + ("lst[::2]", {"lst": [1, 2, 3, 4]}, [1, 3]), + ("lst[::-1]", {"lst": [1, 2, 3]}, [3, 2, 1]), + ("tup[1]", {"tup": (1, 2, 3)}, 2), + ("tup[-1]", {"tup": (1, 2, 3)}, 3), + ("tup[1:3]", {"tup": (1, 2, 3, 4)}, (2, 3)), + ("tup[:]", {"tup": (1, 2, 3)}, (1, 2, 3)), + ("tup[::2]", {"tup": (1, 2, 3, 4)}, (1, 3)), + ("tup[::-1]", {"tup": (1, 2, 3)}, (3, 2, 1)), + ("st[1]", {"str": "abc"}, "b"), + ("st[-1]", {"str": "abc"}, "c"), + ("st[1:3]", {"str": "abcd"}, "bc"), + ("st[:]", {"str": "abc"}, "abc"), + ("st[::2]", {"str": "abcd"}, "ac"), + ("st[::-1]", {"str": "abc"}, "cba"), + ("arr[1]", {"arr": np.array([1, 2, 3])}, 2), + ("arr[1:3]", {"arr": np.array([1, 2, 3, 4])}, np.array([2, 3])), + ("arr[:]", {"arr": np.array([1, 2, 3])}, np.array([1, 2, 3])), + ("arr[::2]", {"arr": np.array([1, 2, 3, 4])}, np.array([1, 3])), + ("arr[::-1]", {"arr": np.array([1, 2, 3])}, np.array([3, 2, 1])), + ("arr[1, 2]", {"arr": np.array([[1, 2, 3], [4, 5, 6]])}, 6), + ("ser[1]", {"ser": pd.Series([1, 2, 3])}, 2), + ("ser.loc[1]", {"ser": pd.Series([1, 2, 3])}, 2), + ("ser.loc[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 3), + ("ser.iloc[1]", {"ser": pd.Series([1, 2, 3])}, 2), + ("ser.iloc[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 2), + ("ser.at[1]", {"ser": pd.Series([1, 2, 3])}, 2), + ("ser.at[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 3), + ("ser.iat[1]", {"ser": pd.Series([1, 2, 3])}, 2), + ("ser.iat[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 2), + ("ser[1:3]", {"ser": pd.Series([1, 2, 3, 4])}, pd.Series([2, 3], index=[1, 2])), + ("ser[:]", {"ser": pd.Series([1, 2, 3])}, pd.Series([1, 2, 3])), + ("ser[::2]", {"ser": pd.Series([1, 2, 3, 4])}, pd.Series([1, 3], index=[0, 2])), + ("ser[::-1]", {"ser": pd.Series([1, 2, 3])}, pd.Series([3, 2, 1], index=[2, 1, 0])), + ("df['y'][1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4), + ("df['y'][5]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3), + ("df.loc[1, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4), + ("df.loc[5, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3), + ("df.iloc[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4), + ("df.iloc[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 4), + ("df.at[1, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4), + ("df.at[5, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3), + ("df.iat[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4), + ("df.iat[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 4), + ], +) +def test_evaluate_subscript(subscript, state, expected_result): + subscript_ast = ast.parse(subscript).body[0].value + result = evaluate_subscript(subscript_ast, state, {}, {}, []) + try: + assert result == expected_result + except ValueError: + assert (result == expected_result).all() + + +@pytest.mark.parametrize( + "subscript, state, expected_error_message", + [ + ("dct['a']", {"dct": {}}, "KeyError: 'a'"), + ("dct[0]", {"dct": {}}, "KeyError: 0"), + ("dct['c']", {"dct": {"a": 1, "b": 2}}, "KeyError: 'c'"), + ("dct[1, 2, 3]", {"dct": {(1, 2): 3}}, "KeyError: (1, 2, 3)"), + ("lst[0]", {"lst": []}, "IndexError: list index out of range"), + ("lst[3]", {"lst": [1, 2, 3]}, "IndexError: list index out of range"), + ("lst[-4]", {"lst": [1, 2, 3]}, "IndexError: list index out of range"), + ("value[0]", {"value": 1}, "TypeError: 'int' object is not subscriptable"), + ], +) +def test_evaluate_subscript_error(subscript, state, expected_error_message): + subscript_ast = ast.parse(subscript).body[0].value + with pytest.raises(InterpreterError, match="Could not index") as exception_info: + _ = evaluate_subscript(subscript_ast, state, {}, {}, []) + assert expected_error_message in str(exception_info.value) + + +@pytest.mark.parametrize( + "subscriptable_class, expectation", + [ + (True, 20), + (False, InterpreterError("TypeError: 'Custom' object is not subscriptable")), + ], +) +def test_evaluate_subscript_with_custom_class(subscriptable_class, expectation): + if subscriptable_class: + + class Custom: + def __getitem__(self, key): + return key * 10 + else: + + class Custom: + pass + + state = {"obj": Custom()} + subscript = "obj[2]" + subscript_ast = ast.parse(subscript).body[0].value + if isinstance(expectation, Exception): + with pytest.raises(type(expectation), match="Could not index") as exception_info: + evaluate_subscript(subscript_ast, state, {}, {}, []) + assert "TypeError: 'Custom' object is not subscriptable" in str(exception_info.value) + else: + result = evaluate_subscript(subscript_ast, state, {}, {}, []) + assert result == expectation + + def test_get_safe_module_handle_lazy_imports(): class FakeModule(types.ModuleType): def __init__(self, name): @@ -1382,15 +1667,497 @@ def test_len(self): @pytest.mark.parametrize( "module,authorized_imports,expected", [ - ("os", ["*"], True), + ("os", ["other", "*"], True), ("AnyModule", ["*"], True), ("os", ["os"], True), ("AnyModule", ["AnyModule"], True), ("Module.os", ["Module"], False), - ("Module.os", ["Module", "os"], True), - ("os.path", ["os"], True), - ("os", ["os.path"], False), + ("Module.os", ["Module", "Module.os"], True), + ("os.path", ["os.*"], True), + ("os", ["os.path"], True), ], ) -def test_check_module_authorized(module: str, authorized_imports: list[str], expected: bool): - assert check_module_authorized(module, authorized_imports) == expected +def test_check_import_authorized(module: str, authorized_imports: list[str], expected: bool): + assert check_import_authorized(module, authorized_imports) == expected + + +class TestLocalPythonExecutor: + def test_state_name(self): + executor = LocalPythonExecutor(additional_authorized_imports=[]) + assert executor.state.get("__name__") == "__main__" + + @pytest.mark.parametrize( + "code", + [ + "d = {'func': lambda x: x + 10}; func = d['func']; func(1)", + "d = {'func': lambda x: x + 10}; d['func'](1)", + ], + ) + def test_call_from_dict(self, code): + executor = LocalPythonExecutor([]) + result, _, _ = executor(code) + assert result == 11 + + @pytest.mark.parametrize( + "code", + [ + "a = b = 1; a", + "a = b = 1; b", + "a, b = c, d = 1, 1; a", + "a, b = c, d = 1, 1; b", + "a, b = c, d = 1, 1; c", + "a, b = c, d = {1, 2}; a", + "a, b = c, d = {1, 2}; c", + "a, b = c, d = {1: 10, 2: 20}; a", + "a, b = c, d = {1: 10, 2: 20}; c", + "a = b = (lambda: 1)(); b", + "a = b = (lambda: 1)(); lambda x: 10; b", + "a = b = (lambda x: lambda y: x + y)(0)(1); b", + dedent(""" + def foo(): + return 1; + a = b = foo(); b"""), + dedent(""" + def foo(*args, **kwargs): + return sum(args) + a = b = foo(1,-1,1); b"""), + "a, b = 1, 2; a, b = b, a; b", + ], + ) + def test_chained_assignments(self, code): + executor = LocalPythonExecutor([]) + executor.send_tools({}) + result, _, _ = executor(code) + assert result == 1 + + def test_evaluate_assign_error(self): + code = "a, b = 1, 2, 3; a" + executor = LocalPythonExecutor([]) + with pytest.raises(InterpreterError, match=".*Cannot unpack tuple of wrong size"): + executor(code) + + +class TestLocalPythonExecutorSecurity: + @pytest.mark.parametrize( + "additional_authorized_imports, expected_error", + [([], InterpreterError("Import of os is not allowed")), (["os"], None)], + ) + def test_vulnerability_import(self, additional_authorized_imports, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor("import os") + + @pytest.mark.parametrize( + "additional_authorized_imports, expected_error", + [([], InterpreterError("Import of builtins is not allowed")), (["builtins"], None)], + ) + def test_vulnerability_builtins(self, additional_authorized_imports, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor("import builtins") + + @pytest.mark.parametrize( + "additional_authorized_imports, expected_error", + [([], InterpreterError("Import of builtins is not allowed")), (["builtins"], None)], + ) + def test_vulnerability_builtins_safe_functions(self, additional_authorized_imports, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor("import builtins; builtins.print(1)") + + @pytest.mark.parametrize( + "additional_authorized_imports, additional_tools, expected_error", + [ + ([], [], InterpreterError("Import of builtins is not allowed")), + (["builtins"], [], InterpreterError("Forbidden access to function: exec")), + (["builtins"], ["exec"], None), + ], + ) + def test_vulnerability_builtins_dangerous_functions( + self, additional_authorized_imports, additional_tools, expected_error + ): + executor = LocalPythonExecutor(additional_authorized_imports) + if additional_tools: + from builtins import exec + + executor.send_tools({"exec": exec}) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor("import builtins; builtins.exec") + + @pytest.mark.parametrize( + "additional_authorized_imports, additional_tools, expected_error", + [ + ([], [], InterpreterError("Import of os is not allowed")), + (["os"], [], InterpreterError("Forbidden access to function: popen")), + (["os"], ["popen"], None), + ], + ) + def test_vulnerability_dangerous_functions(self, additional_authorized_imports, additional_tools, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + if additional_tools: + from os import popen + + executor.send_tools({"popen": popen}) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor("import os; os.popen") + + @pytest.mark.parametrize("dangerous_function", DANGEROUS_FUNCTIONS) + def test_vulnerability_for_all_dangerous_functions(self, dangerous_function): + dangerous_module_name, dangerous_function_name = dangerous_function.rsplit(".", 1) + # Skip test if module is not installed: posix module is not installed on Windows + pytest.importorskip(dangerous_module_name) + executor = LocalPythonExecutor([dangerous_module_name]) + if "__" in dangerous_function_name: + error_match = f".*Forbidden access to dunder attribute: {dangerous_function_name}" + else: + error_match = f".*Forbidden access to function: {dangerous_function_name}.*" + with pytest.raises(InterpreterError, match=error_match): + executor(f"import {dangerous_module_name}; {dangerous_function}") + + @pytest.mark.parametrize( + "additional_authorized_imports, expected_error", + [ + ([], InterpreterError("Import of sys is not allowed")), + (["sys"], InterpreterError("Forbidden access to module: os")), + (["sys", "os"], None), + ], + ) + def test_vulnerability_via_sys(self, additional_authorized_imports, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor( + dedent( + """ + import sys + sys.modules["os"].system(":") + """ + ) + ) + + @pytest.mark.parametrize("dangerous_module", DANGEROUS_MODULES) + def test_vulnerability_via_sys_for_all_dangerous_modules(self, dangerous_module): + import sys + + if dangerous_module not in sys.modules or dangerous_module == "sys": + pytest.skip("module not present in sys.modules") + executor = LocalPythonExecutor(["sys"]) + with pytest.raises(InterpreterError) as exception_info: + executor( + dedent( + f""" + import sys + sys.modules["{dangerous_module}"] + """ + ) + ) + assert f"Forbidden access to module: {dangerous_module}" in str(exception_info.value) + + @pytest.mark.parametrize( + "additional_authorized_imports, expected_error", + [(["importlib"], InterpreterError("Forbidden access to module: os")), (["importlib", "os"], None)], + ) + def test_vulnerability_via_importlib(self, additional_authorized_imports, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor( + dedent( + """ + import importlib + importlib.import_module("os").system(":") + """ + ) + ) + + @pytest.mark.parametrize( + "code, additional_authorized_imports, expected_error", + [ + # os submodule + ( + "import queue; queue.threading._os.system(':')", + [], + InterpreterError("Forbidden access to module: threading"), + ), + ( + "import queue; queue.threading._os.system(':')", + ["threading"], + InterpreterError("Forbidden access to module: os"), + ), + ("import random; random._os.system(':')", [], InterpreterError("Forbidden access to module: os")), + ( + "import random; random.__dict__['_os'].system(':')", + [], + InterpreterError("Forbidden access to dunder attribute: __dict__"), + ), + ( + "import doctest; doctest.inspect.os.system(':')", + ["doctest"], + InterpreterError("Forbidden access to module: inspect"), + ), + ( + "import doctest; doctest.inspect.os.system(':')", + ["doctest", "inspect"], + InterpreterError("Forbidden access to module: os"), + ), + # subprocess submodule + ( + "import asyncio; asyncio.base_events.events.subprocess", + ["asyncio"], + InterpreterError("Forbidden access to module: asyncio.base_events"), + ), + ( + "import asyncio; asyncio.base_events.events.subprocess", + ["asyncio", "asyncio.base_events"], + InterpreterError("Forbidden access to module: asyncio.events"), + ), + ( + "import asyncio; asyncio.base_events.events.subprocess", + ["asyncio", "asyncio.base_events", "asyncio.base_events.events"], + InterpreterError("Forbidden access to module: asyncio.events"), + ), + # sys submodule + ( + "import queue; queue.threading._sys.modules['os'].system(':')", + [], + InterpreterError("Forbidden access to module: threading"), + ), + ( + "import queue; queue.threading._sys.modules['os'].system(':')", + ["threading"], + InterpreterError("Forbidden access to module: sys"), + ), + # Allowed + ("import pandas; pandas.io", ["pandas", "pandas.io"], None), + ], + ) + def test_vulnerability_via_submodules(self, code, additional_authorized_imports, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor(code) + + @pytest.mark.parametrize( + "additional_authorized_imports, additional_tools, expected_error", + [ + ([], [], InterpreterError("Import of sys is not allowed")), + (["sys"], [], InterpreterError("Forbidden access to module: builtins")), + ( + ["sys", "builtins"], + [], + InterpreterError("Forbidden access to function: __import__"), + ), + (["sys", "builtins"], ["__import__"], InterpreterError("Forbidden access to module: os")), + (["sys", "builtins", "os"], ["__import__"], None), + ], + ) + def test_vulnerability_builtins_via_sys(self, additional_authorized_imports, additional_tools, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + if additional_tools: + from builtins import __import__ + + executor.send_tools({"__import__": __import__}) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor( + dedent( + """ + import sys + builtins = sys._getframe().f_builtins + builtins_import = builtins["__import__"] + os_module = builtins_import("os") + os_module.system(":") + """ + ) + ) + + @pytest.mark.parametrize("patch_builtin_import_module", [False, True]) # builtins_import.__module__ = None + @pytest.mark.parametrize( + "additional_authorized_imports, additional_tools, expected_error", + [ + ([], [], InterpreterError("Forbidden access to dunder attribute: __traceback__")), + ( + ["builtins", "os"], + ["__import__"], + InterpreterError("Forbidden access to dunder attribute: __traceback__"), + ), + ], + ) + def test_vulnerability_builtins_via_traceback( + self, patch_builtin_import_module, additional_authorized_imports, additional_tools, expected_error, monkeypatch + ): + if patch_builtin_import_module: + monkeypatch.setattr("builtins.__import__.__module__", None) # inspect.getmodule(func) = None + executor = LocalPythonExecutor(additional_authorized_imports) + if additional_tools: + from builtins import __import__ + + executor.send_tools({"__import__": __import__}) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor( + dedent( + """ + try: + 1 / 0 + except Exception as e: + builtins = e.__traceback__.tb_frame.f_back.f_globals["__builtins__"] + builtins_import = builtins["__import__"] + os_module = builtins_import("os") + os_module.system(":") + """ + ) + ) + + @pytest.mark.parametrize("patch_builtin_import_module", [False, True]) # builtins_import.__module__ = None + @pytest.mark.parametrize( + "additional_authorized_imports, additional_tools, expected_error", + [ + ([], [], InterpreterError("Forbidden access to dunder attribute: __base__")), + (["warnings"], [], InterpreterError("Forbidden access to dunder attribute: __base__")), + ( + ["warnings", "builtins"], + [], + InterpreterError("Forbidden access to dunder attribute: __base__"), + ), + (["warnings", "builtins", "os"], [], InterpreterError("Forbidden access to dunder attribute: __base__")), + ( + ["warnings", "builtins", "os"], + ["__import__"], + InterpreterError("Forbidden access to dunder attribute: __base__"), + ), + ], + ) + def test_vulnerability_builtins_via_class_catch_warnings( + self, patch_builtin_import_module, additional_authorized_imports, additional_tools, expected_error, monkeypatch + ): + if patch_builtin_import_module: + monkeypatch.setattr("builtins.__import__.__module__", None) # inspect.getmodule(func) = None + executor = LocalPythonExecutor(additional_authorized_imports) + if additional_tools: + from builtins import __import__ + + executor.send_tools({"__import__": __import__}) + if isinstance(expected_error, tuple): # different error depending on patch status + expected_error = expected_error[patch_builtin_import_module] + if isinstance(expected_error, Exception): + expectation = pytest.raises(type(expected_error), match=f".*{expected_error}") + elif expected_error is None: + expectation = does_not_raise() + with expectation: + executor( + dedent( + """ + classes = {}.__class__.__base__.__subclasses__() + for cls in classes: + if cls.__name__ == "catch_warnings": + break + builtins = cls()._module.__builtins__ + builtins_import = builtins["__import__"] + os_module = builtins_import('os') + os_module.system(":") + """ + ) + ) + + @pytest.mark.filterwarnings("ignore::DeprecationWarning") + @pytest.mark.parametrize( + "additional_authorized_imports, expected_error", + [ + ([], InterpreterError("Forbidden access to dunder attribute: __base__")), + (["os"], InterpreterError("Forbidden access to dunder attribute: __base__")), + ], + ) + def test_vulnerability_load_module_via_builtin_importer(self, additional_authorized_imports, expected_error): + executor = LocalPythonExecutor(additional_authorized_imports) + with ( + pytest.raises(type(expected_error), match=f".*{expected_error}") + if isinstance(expected_error, Exception) + else does_not_raise() + ): + executor( + dedent( + """ + classes = {}.__class__.__base__.__subclasses__() + for cls in classes: + if cls.__name__ == "BuiltinImporter": + break + os_module = cls().load_module("os") + os_module.system(":") + """ + ) + ) + + def test_vulnerability_class_via_subclasses(self): + # Subclass: subprocess.Popen + executor = LocalPythonExecutor([]) + code = dedent( + """ + for cls in ().__class__.__base__.__subclasses__(): + if 'Popen' in cls.__class__.__repr__(cls): + break + cls(["sh", "-c", ":"]).wait() + """ + ) + with pytest.raises(InterpreterError, match="Forbidden access to dunder attribute: __base__"): + executor(code) + + code = dedent( + """ + [c for c in ().__class__.__base__.__subclasses__() if "Popen" in c.__class__.__repr__(c)][0]( + ["sh", "-c", ":"] + ).wait() + """ + ) + with pytest.raises(InterpreterError, match="Forbidden access to dunder attribute: __base__"): + executor(code) + + @pytest.mark.parametrize( + "code, dunder_attribute", + [("a = (); b = a.__class__", "__class__"), ("class A:\n attr=1\nx = A()\nx_dict = x.__dict__", "__dict__")], + ) + def test_vulnerability_via_dunder_access(self, code, dunder_attribute): + executor = LocalPythonExecutor([]) + with pytest.raises(InterpreterError, match=f"Forbidden access to dunder attribute: {dunder_attribute}"): + executor(code) + + def test_vulnerability_via_dunder_indirect_access(self): + executor = LocalPythonExecutor([]) + code = "a = (); b = getattr(a, '__class__')" + with pytest.raises(InterpreterError, match="Forbidden function evaluation: 'getattr'"): + executor(code) diff --git a/tests/test_mcp_client.py b/tests/test_mcp_client.py new file mode 100644 index 000000000..30b658a70 --- /dev/null +++ b/tests/test_mcp_client.py @@ -0,0 +1,60 @@ +from textwrap import dedent + +import pytest +from mcp import StdioServerParameters + +from smolagents.mcp_client import MCPClient + + +@pytest.fixture +def echo_server_script(): + return dedent( + ''' + from mcp.server.fastmcp import FastMCP + + mcp = FastMCP("Echo Server") + + @mcp.tool() + def echo_tool(text: str) -> str: + """Echo the input text""" + return f"Echo: {text}" + + mcp.run() + ''' + ) + + +def test_mcp_client_with_syntax(echo_server_script: str): + """Test the MCPClient with the context manager syntax.""" + server_parameters = StdioServerParameters(command="python", args=["-c", echo_server_script]) + with MCPClient(server_parameters) as tools: + assert len(tools) == 1 + assert tools[0].name == "echo_tool" + assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!" + + +def test_mcp_client_try_finally_syntax(echo_server_script: str): + """Test the MCPClient with the try ... finally syntax.""" + server_parameters = StdioServerParameters(command="python", args=["-c", echo_server_script]) + mcp_client = MCPClient(server_parameters) + try: + tools = mcp_client.get_tools() + assert len(tools) == 1 + assert tools[0].name == "echo_tool" + assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!" + finally: + mcp_client.disconnect() + + +def test_multiple_servers(echo_server_script: str): + """Test the MCPClient with multiple servers.""" + server_parameters = [ + StdioServerParameters(command="python", args=["-c", echo_server_script]), + StdioServerParameters(command="python", args=["-c", echo_server_script]), + ] + with MCPClient(server_parameters) as tools: + assert len(tools) == 2 + assert tools[0].name == "echo_tool" + assert tools[1].name == "echo_tool" + assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!" + assert tools[1].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!" diff --git a/tests/test_memory.py b/tests/test_memory.py index c007a185c..04c6b7f47 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -70,7 +70,7 @@ def test_action_step_to_messages(): assert "type" in content assert "text" in content message = messages[1] - assert message["role"] == MessageRole.ASSISTANT + assert message["role"] == MessageRole.TOOL_CALL assert len(message["content"]) == 1 text_content = message["content"][0] @@ -78,23 +78,43 @@ def test_action_step_to_messages(): assert "type" in text_content assert "text" in text_content - observation_message = messages[2] - assert observation_message["role"] == MessageRole.TOOL_RESPONSE - assert "Observation:\nThis is a nice observation" in observation_message["content"][0]["text"] - - image_message = messages[3] - image_content = image_message["content"][1] + image_message = messages[2] + image_content = image_message["content"][0] assert isinstance(image_content, dict) assert "type" in image_content assert "image" in image_content + observation_message = messages[3] + assert observation_message["role"] == MessageRole.TOOL_RESPONSE + assert "Observation:\nThis is a nice observation" in observation_message["content"][0]["text"] + + +def test_action_step_to_messages_no_tool_calls_with_observations(): + action_step = ActionStep( + model_input_messages=None, + tool_calls=None, + start_time=None, + end_time=None, + step_number=None, + error=None, + duration=None, + model_output_message=None, + model_output=None, + observations="This is an observation.", + observations_images=None, + action_output=None, + ) + messages = action_step.to_messages() + assert len(messages) == 1 + observation_message = messages[0] + assert observation_message["role"] == MessageRole.TOOL_RESPONSE + assert "Observation:\nThis is an observation." in observation_message["content"][0]["text"] + def test_planning_step_to_messages(): planning_step = PlanningStep( model_input_messages=[Message(role=MessageRole.USER, content="Hello")], - model_output_message_facts=ChatMessage(role=MessageRole.ASSISTANT, content="Facts"), - facts="These are facts.", - model_output_message_plan=ChatMessage(role=MessageRole.ASSISTANT, content="Plan"), + model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Plan"), plan="This is a plan.", ) messages = planning_step.to_messages(summary_mode=False) @@ -103,14 +123,14 @@ def test_planning_step_to_messages(): assert isinstance(message, dict) assert "role" in message assert "content" in message - assert isinstance(message["role"], MessageRole) - assert message["role"] == MessageRole.ASSISTANT assert isinstance(message["content"], list) assert len(message["content"]) == 1 for content in message["content"]: assert isinstance(content, dict) assert "type" in content assert "text" in content + assert messages[0]["role"] == MessageRole.ASSISTANT + assert messages[1]["role"] == MessageRole.USER def test_task_step_to_messages(): diff --git a/tests/test_models.py b/tests/test_models.py index f663972a7..fa81ae82a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -15,35 +15,67 @@ import json import sys import unittest -from pathlib import Path -from typing import Optional +from contextlib import ExitStack from unittest.mock import MagicMock, patch import pytest -from transformers.testing_utils import get_tests_dir +from huggingface_hub import ChatCompletionOutputMessage from smolagents.models import ( + AmazonBedrockServerModel, + AzureOpenAIServerModel, ChatMessage, + ChatMessageToolCall, HfApiModel, + InferenceClientModel, LiteLLMModel, + LiteLLMRouterModel, MessageRole, MLXModel, + Model, OpenAIServerModel, TransformersModel, get_clean_message_list, + get_tool_call_from_text, get_tool_json_schema, parse_json_if_needed, - parse_tool_args_if_needed, + supports_stop_parameter, ) from smolagents.tools import tool from .utils.markers import require_run_all -class ModelTests(unittest.TestCase): +class TestModel: + @pytest.mark.parametrize( + "model_id, stop_sequences, should_contain_stop", + [ + ("regular-model", ["stop1", "stop2"], True), # Regular model should include stop + ("openai/o3", ["stop1", "stop2"], False), # o3 model should not include stop + ("openai/o4-mini", ["stop1", "stop2"], False), # o4-mini model should not include stop + ("something/else/o3", ["stop1", "stop2"], False), # Path ending with o3 should not include stop + ("something/else/o4-mini", ["stop1", "stop2"], False), # Path ending with o4-mini should not include stop + ("o3", ["stop1", "stop2"], False), # Exact o3 model should not include stop + ("o4-mini", ["stop1", "stop2"], False), # Exact o4-mini model should not include stop + ("regular-model", None, False), # None stop_sequences should not add stop parameter + ], + ) + def test_prepare_completion_kwargs_stop_sequences(self, model_id, stop_sequences, should_contain_stop): + model = Model() + model.model_id = model_id + completion_kwargs = model._prepare_completion_kwargs( + messages=[{"role": "user", "content": [{"type": "text", "text": "Hello"}]}], stop_sequences=stop_sequences + ) + # Verify that the stop parameter is only included when appropriate + if should_contain_stop: + assert "stop" in completion_kwargs + assert completion_kwargs["stop"] == stop_sequences + else: + assert "stop" not in completion_kwargs + def test_get_json_schema_has_nullable_args(self): @tool - def get_weather(location: str, celsius: Optional[bool] = False) -> str: + def get_weather(location: str, celsius: bool | None = False) -> str: """ Get weather in the next days at given location. Secretly this tool does not care about the location, it hates the weather everywhere. @@ -81,7 +113,8 @@ def test_get_mlx_message_tricky_stop_sequence(self): # check stop_sequence capture when output has trailing chars assert model(messages, stop_sequences=[stop_sequence]).content == "I'm ready to help you" - def test_transformers_message_no_tool(self): + def test_transformers_message_no_tool(self, monkeypatch): + monkeypatch.setattr("huggingface_hub.constants.HF_HUB_DOWNLOAD_TIMEOUT", 30) # instead of 10 model = TransformersModel( model_id="HuggingFaceTB/SmolLM2-135M-Instruct", max_new_tokens=5, @@ -89,27 +122,35 @@ def test_transformers_message_no_tool(self): do_sample=False, ) messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] - output = model(messages, stop_sequences=["great"]).content + output = model.generate(messages, stop_sequences=["great"]).content assert output == "assistant\nHello" - def test_transformers_message_vl_no_tool(self): - from PIL import Image + output = model.generate_stream(messages, stop_sequences=["great"]) + output_str = "" + for el in output: + output_str += el.content + assert output_str == "assistant\nHello" + + def test_transformers_message_vl_no_tool(self, shared_datadir, monkeypatch): + monkeypatch.setattr("huggingface_hub.constants.HF_HUB_DOWNLOAD_TIMEOUT", 30) # instead of 10 + import PIL.Image - img = Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png") + img = PIL.Image.open(shared_datadir / "000000039769.png") model = TransformersModel( model_id="llava-hf/llava-interleave-qwen-0.5b-hf", - max_new_tokens=5, + max_new_tokens=4, device_map="cpu", do_sample=False, ) messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}, {"type": "image", "image": img}]}] - output = model(messages, stop_sequences=["great"]).content - assert output == "Hello! How can" + output = model.generate(messages, stop_sequences=["great"]).content + assert output == "I am" - def test_parse_tool_args_if_needed(self): - original_message = ChatMessage(role="user", content=[{"type": "text", "text": "Hello!"}]) - parsed_message = parse_tool_args_if_needed(original_message) - assert parsed_message == original_message + output = model.generate_stream(messages, stop_sequences=["great"]) + output_str = "" + for el in output: + output_str += el.content + assert output_str == "I am" def test_parse_json_if_needed(self): args = "abc" @@ -129,11 +170,13 @@ def test_parse_json_if_needed(self): assert parsed_args == 3 -class TestHfApiModel: +class TestInferenceClientModel: def test_call_with_custom_role_conversions(self): custom_role_conversions = {MessageRole.USER: MessageRole.SYSTEM} - model = HfApiModel(model_id="test-model", custom_role_conversions=custom_role_conversions) + model = InferenceClientModel(model_id="test-model", custom_role_conversions=custom_role_conversions) model.client = MagicMock() + mock_response = model.client.chat_completion.return_value + mock_response.choices[0].message = ChatCompletionOutputMessage(role="assistant") messages = [{"role": "user", "content": "Test message"}] _ = model(messages) # Verify that the role conversion was applied @@ -141,24 +184,73 @@ def test_call_with_custom_role_conversions(self): "role conversion should be applied" ) + def test_init_model_with_tokens(self): + model = InferenceClientModel(model_id="test-model", token="abc") + assert model.client.token == "abc" + + model = InferenceClientModel(model_id="test-model", api_key="abc") + assert model.client.token == "abc" + + with pytest.raises(ValueError, match="Received both `token` and `api_key` arguments."): + InferenceClientModel(model_id="test-model", token="abc", api_key="def") + @require_run_all def test_get_hfapi_message_no_tool(self): - model = HfApiModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10) + model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10) messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] model(messages, stop_sequences=["great"]) @require_run_all def test_get_hfapi_message_no_tool_external_provider(self): - model = HfApiModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10) + model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10) messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] model(messages, stop_sequences=["great"]) +class TestHfApiModel: + def test_init_model_with_tokens(self): + model = HfApiModel(model_id="test-model", token="abc") + assert model.client.token == "abc" + + model = HfApiModel(model_id="test-model", api_key="abc") + assert model.client.token == "abc" + + with pytest.raises(ValueError) as e: + _ = HfApiModel(model_id="test-model", token="abc", api_key="def") + assert "Received both `token` and `api_key` arguments." in str(e) + + @require_run_all + def test_get_hfapi_message_no_tool(self): + model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10) + messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] + model.generate(messages, stop_sequences=["great"]) + + @require_run_all + def test_get_hfapi_message_no_tool_external_provider(self): + model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10) + messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] + model.generate(messages, stop_sequences=["great"]) + + @require_run_all + def test_get_hfapi_message_stream_no_tool(self): + model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10) + messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] + for el in model.generate_stream(messages, stop_sequences=["great"]): + assert el.content is not None + + @require_run_all + def test_get_hfapi_message_stream_no_tool_external_provider(self): + model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10) + messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] + for el in model.generate_stream(messages, stop_sequences=["great"]): + assert el.content is not None + + class TestLiteLLMModel: @pytest.mark.parametrize( "model_id, error_flag", [ - ("groq/llama-3.3-70b", "Missing API Key"), + ("groq/llama-3.3-70b", "Invalid API Key"), ("cerebras/llama-3.3-70b", "The api_key client option must be set"), ("mistral/mistral-tiny", "The api_key client option must be set"), ], @@ -168,7 +260,12 @@ def test_call_different_providers_without_key(self, model_id, error_flag): messages = [{"role": "user", "content": [{"type": "text", "text": "Test message"}]}] with pytest.raises(Exception) as e: # This should raise 401 error because of missing API key, not fail for any "bad format" reason - model(messages) + model.generate(messages) + assert error_flag in str(e) + with pytest.raises(Exception) as e: + # This should raise 401 error because of missing API key, not fail for any "bad format" reason + for el in model.generate_stream(messages): + assert el.content is not None assert error_flag in str(e) def test_passing_flatten_messages(self): @@ -179,6 +276,41 @@ def test_passing_flatten_messages(self): assert model.flatten_messages_as_text +class TestLiteLLMRouterModel: + @pytest.mark.parametrize( + "model_id, expected", + [ + ("llama-3.3-70b", False), + ("llama-3.3-70b", True), + ("mistral-tiny", True), + ], + ) + def test_flatten_messages_as_text(self, model_id, expected): + model_list = [ + {"model_name": "llama-3.3-70b", "litellm_params": {"model": "groq/llama-3.3-70b"}}, + {"model_name": "llama-3.3-70b", "litellm_params": {"model": "cerebras/llama-3.3-70b"}}, + {"model_name": "mistral-tiny", "litellm_params": {"model": "mistral/mistral-tiny"}}, + ] + model = LiteLLMRouterModel(model_id=model_id, model_list=model_list, flatten_messages_as_text=expected) + assert model.flatten_messages_as_text is expected + + def test_create_client(self): + model_list = [ + {"model_name": "llama-3.3-70b", "litellm_params": {"model": "groq/llama-3.3-70b"}}, + {"model_name": "llama-3.3-70b", "litellm_params": {"model": "cerebras/llama-3.3-70b"}}, + ] + with patch("litellm.Router") as mock_router: + router_model = LiteLLMRouterModel( + model_id="model-group-1", model_list=model_list, client_kwargs={"routing_strategy": "simple-shuffle"} + ) + # Ensure that the Router constructor was called with the expected keyword arguments + mock_router.assert_called_once() + assert mock_router.call_count == 1 + assert mock_router.call_args.kwargs["model_list"] == model_list + assert mock_router.call_args.kwargs["routing_strategy"] == "simple-shuffle" + assert router_model.client == mock_router.return_value + + class TestOpenAIServerModel: def test_client_kwargs_passed_correctly(self): model_id = "gpt-3.5-turbo" @@ -189,7 +321,7 @@ def test_client_kwargs_passed_correctly(self): client_kwargs = {"max_retries": 5} with patch("openai.OpenAI") as MockOpenAI: - _ = OpenAIServerModel( + model = OpenAIServerModel( model_id=model_id, api_base=api_base, api_key=api_key, @@ -197,10 +329,103 @@ def test_client_kwargs_passed_correctly(self): project=project, client_kwargs=client_kwargs, ) - MockOpenAI.assert_called_once_with( - base_url=api_base, api_key=api_key, organization=organization, project=project, max_retries=5 + MockOpenAI.assert_called_once_with( + base_url=api_base, api_key=api_key, organization=organization, project=project, max_retries=5 + ) + assert model.client == MockOpenAI.return_value + + +class TestAmazonBedrockServerModel: + def test_client_for_bedrock(self): + model_id = "us.amazon.nova-pro-v1:0" + + with patch("boto3.client") as MockBoto3: + model = AmazonBedrockServerModel( + model_id=model_id, ) + assert model.client == MockBoto3.return_value + + +class TestAzureOpenAIServerModel: + def test_client_kwargs_passed_correctly(self): + model_id = "gpt-3.5-turbo" + api_key = "test_api_key" + api_version = "2023-12-01-preview" + azure_endpoint = "https://example-resource.azure.openai.com/" + organization = "test_org" + project = "test_project" + client_kwargs = {"max_retries": 5} + + with patch("openai.OpenAI") as MockOpenAI, patch("openai.AzureOpenAI") as MockAzureOpenAI: + model = AzureOpenAIServerModel( + model_id=model_id, + api_key=api_key, + api_version=api_version, + azure_endpoint=azure_endpoint, + organization=organization, + project=project, + client_kwargs=client_kwargs, + ) + assert MockOpenAI.call_count == 0 + MockAzureOpenAI.assert_called_once_with( + base_url=None, + api_key=api_key, + api_version=api_version, + azure_endpoint=azure_endpoint, + organization=organization, + project=project, + max_retries=5, + ) + assert model.client == MockAzureOpenAI.return_value + + +class TestTransformersModel: + @pytest.mark.parametrize( + "patching", + [ + [ + ( + "transformers.AutoModelForImageTextToText.from_pretrained", + {"side_effect": ValueError("Unrecognized configuration class")}, + ), + ("transformers.AutoModelForCausalLM.from_pretrained", {}), + ("transformers.AutoTokenizer.from_pretrained", {}), + ], + [ + ("transformers.AutoModelForImageTextToText.from_pretrained", {}), + ("transformers.AutoProcessor.from_pretrained", {}), + ], + ], + ) + def test_init(self, patching): + with ExitStack() as stack: + mocks = {target: stack.enter_context(patch(target, **kwargs)) for target, kwargs in patching} + model = TransformersModel( + model_id="test-model", device_map="cpu", torch_dtype="float16", trust_remote_code=True + ) + assert model.model_id == "test-model" + if "transformers.AutoTokenizer.from_pretrained" in mocks: + assert model.model == mocks["transformers.AutoModelForCausalLM.from_pretrained"].return_value + assert mocks["transformers.AutoModelForCausalLM.from_pretrained"].call_args.kwargs == { + "device_map": "cpu", + "torch_dtype": "float16", + "trust_remote_code": True, + } + assert model.tokenizer == mocks["transformers.AutoTokenizer.from_pretrained"].return_value + assert mocks["transformers.AutoTokenizer.from_pretrained"].call_args.args == ("test-model",) + assert mocks["transformers.AutoTokenizer.from_pretrained"].call_args.kwargs == {"trust_remote_code": True} + elif "transformers.AutoProcessor.from_pretrained" in mocks: + assert model.model == mocks["transformers.AutoModelForImageTextToText.from_pretrained"].return_value + assert mocks["transformers.AutoModelForImageTextToText.from_pretrained"].call_args.kwargs == { + "device_map": "cpu", + "torch_dtype": "float16", + "trust_remote_code": True, + } + assert model.processor == mocks["transformers.AutoProcessor.from_pretrained"].return_value + assert mocks["transformers.AutoProcessor.from_pretrained"].call_args.args == ("test-model",) + assert mocks["transformers.AutoProcessor.from_pretrained"].call_args.kwargs == {"trust_remote_code": True} + def test_get_clean_message_list_basic(): messages = [ @@ -277,4 +502,144 @@ def test_get_clean_message_list_flatten_messages_as_text(): result = get_clean_message_list(messages, flatten_messages_as_text=True) assert len(result) == 1 assert result[0]["role"] == "user" - assert result[0]["content"] == "Hello!How are you?" + assert result[0]["content"] == "Hello!\nHow are you?" + + +@pytest.mark.parametrize( + "model_class, model_kwargs, patching, expected_flatten_messages_as_text", + [ + (AzureOpenAIServerModel, {}, ("openai.AzureOpenAI", {}), False), + (InferenceClientModel, {}, ("huggingface_hub.InferenceClient", {}), False), + (LiteLLMModel, {}, None, False), + (LiteLLMModel, {"model_id": "ollama"}, None, True), + (LiteLLMModel, {"model_id": "groq"}, None, True), + (LiteLLMModel, {"model_id": "cerebras"}, None, True), + (MLXModel, {}, ("mlx_lm.load", {"return_value": (MagicMock(), MagicMock())}), True), + (OpenAIServerModel, {}, ("openai.OpenAI", {}), False), + (OpenAIServerModel, {"flatten_messages_as_text": True}, ("openai.OpenAI", {}), True), + ( + TransformersModel, + {}, + [ + ( + "transformers.AutoModelForImageTextToText.from_pretrained", + {"side_effect": ValueError("Unrecognized configuration class")}, + ), + ("transformers.AutoModelForCausalLM.from_pretrained", {}), + ("transformers.AutoTokenizer.from_pretrained", {}), + ], + True, + ), + ( + TransformersModel, + {}, + [ + ("transformers.AutoModelForImageTextToText.from_pretrained", {}), + ("transformers.AutoProcessor.from_pretrained", {}), + ], + False, + ), + ], +) +def test_flatten_messages_as_text_for_all_models( + model_class, model_kwargs, patching, expected_flatten_messages_as_text +): + with ExitStack() as stack: + if isinstance(patching, list): + for target, kwargs in patching: + stack.enter_context(patch(target, **kwargs)) + elif patching: + target, kwargs = patching + stack.enter_context(patch(target, **kwargs)) + + model = model_class(**{"model_id": "test-model", **model_kwargs}) + assert model.flatten_messages_as_text is expected_flatten_messages_as_text, f"{model_class.__name__} failed" + + +@pytest.mark.parametrize( + "model_id,expected", + [ + # Unsupported base models + ("o3", False), + ("o4-mini", False), + # Unsupported versioned models + ("o3-2025-04-16", False), + ("o4-mini-2025-04-16", False), + # Unsupported models with path prefixes + ("openai/o3", False), + ("openai/o4-mini", False), + ("openai/o3-2025-04-16", False), + ("openai/o4-mini-2025-04-16", False), + # Supported models + ("o3-mini", True), # Different from o3 + ("o3-mini-2025-01-31", True), # Different from o3 + ("o4", True), # Different from o4-mini + ("o4-turbo", True), # Different from o4-mini + ("gpt-4", True), + ("claude-3-5-sonnet", True), + ("mistral-large", True), + # Supported models with path prefixes + ("openai/gpt-4", True), + ("anthropic/claude-3-5-sonnet", True), + ("mistralai/mistral-large", True), + # Edge cases + ("", True), # Empty string doesn't match pattern + ("o3x", True), # Not exactly o3 + ("o3_mini", True), # Not o3-mini format + ("prefix-o3", True), # o3 not at start + ], +) +def test_supports_stop_parameter(model_id, expected): + """Test the supports_stop_parameter function with various model IDs""" + assert supports_stop_parameter(model_id) == expected, f"Failed for model_id: {model_id}" + + +class TestGetToolCallFromText: + @pytest.fixture(autouse=True) + def mock_uuid4(self): + with patch("uuid.uuid4", return_value="test-uuid"): + yield + + def test_get_tool_call_from_text_basic(self): + text = '{"name": "weather_tool", "arguments": "New York"}' + result = get_tool_call_from_text(text, "name", "arguments") + assert isinstance(result, ChatMessageToolCall) + assert result.id == "test-uuid" + assert result.type == "function" + assert result.function.name == "weather_tool" + assert result.function.arguments == "New York" + + def test_get_tool_call_from_text_name_key_missing(self): + text = '{"action": "weather_tool", "arguments": "New York"}' + with pytest.raises(ValueError) as exc_info: + get_tool_call_from_text(text, "name", "arguments") + error_msg = str(exc_info.value) + assert "Key tool_name_key='name' not found" in error_msg + assert "'action', 'arguments'" in error_msg + + def test_get_tool_call_from_text_json_object_args(self): + text = '{"name": "weather_tool", "arguments": {"city": "New York"}}' + result = get_tool_call_from_text(text, "name", "arguments") + assert result.function.arguments == {"city": "New York"} + + def test_get_tool_call_from_text_json_string_args(self): + text = '{"name": "weather_tool", "arguments": "{\\"city\\": \\"New York\\"}"}' + result = get_tool_call_from_text(text, "name", "arguments") + assert result.function.arguments == {"city": "New York"} + + def test_get_tool_call_from_text_missing_args(self): + text = '{"name": "weather_tool"}' + result = get_tool_call_from_text(text, "name", "arguments") + assert result.function.arguments is None + + def test_get_tool_call_from_text_custom_keys(self): + text = '{"tool": "weather_tool", "params": "New York"}' + result = get_tool_call_from_text(text, "tool", "params") + assert result.function.name == "weather_tool" + assert result.function.arguments == "New York" + + def test_get_tool_call_from_text_numeric_args(self): + text = '{"name": "calculator", "arguments": 42}' + result = get_tool_call_from_text(text, "name", "arguments") + assert result.function.name == "calculator" + assert result.function.arguments == 42 diff --git a/tests/test_monitoring.py b/tests/test_monitoring.py index 7483214b1..41bbc8b8e 100644 --- a/tests/test_monitoring.py +++ b/tests/test_monitoring.py @@ -15,8 +15,9 @@ import unittest +import pytest + from smolagents import ( - AgentError, AgentImage, CodeAgent, ToolCallingAgent, @@ -26,16 +27,16 @@ ChatMessage, ChatMessageToolCall, ChatMessageToolCallDefinition, + Model, ) -from smolagents.monitoring import AgentLogger, LogLevel -class FakeLLMModel: +class FakeLLMModel(Model): def __init__(self): self.last_input_token_count = 10 self.last_output_token_count = 20 - def __call__(self, prompt, tools_to_call_from=None, **kwargs): + def generate(self, prompt, tools_to_call_from=None, **kwargs): if tools_to_call_from is not None: return ChatMessage( role="assistant", @@ -84,12 +85,12 @@ def test_toolcalling_agent_metrics(self): self.assertEqual(agent.monitor.total_output_token_count, 20) def test_code_agent_metrics_max_steps(self): - class FakeLLMModelMalformedAnswer: + class FakeLLMModelMalformedAnswer(Model): def __init__(self): self.last_input_token_count = 10 self.last_output_token_count = 20 - def __call__(self, prompt, **kwargs): + def generate(self, prompt, **kwargs): return ChatMessage(role="assistant", content="Malformed answer") agent = CodeAgent( @@ -104,12 +105,12 @@ def __call__(self, prompt, **kwargs): self.assertEqual(agent.monitor.total_output_token_count, 40) def test_code_agent_metrics_generation_error(self): - class FakeLLMModelGenerationException: + class FakeLLMModelGenerationException(Model): def __init__(self): self.last_input_token_count = 10 self.last_output_token_count = 20 - def __call__(self, prompt, **kwargs): + def generate(self, prompt, **kwargs): self.last_input_token_count = 10 self.last_output_token_count = 0 raise Exception("Cannot generate") @@ -119,9 +120,10 @@ def __call__(self, prompt, **kwargs): model=FakeLLMModelGenerationException(), max_steps=1, ) - agent.run("Fake task") + with pytest.raises(Exception): + agent.run("Fake task") - self.assertEqual(agent.monitor.total_input_token_count, 20) # Should have done two monitoring callbacks + self.assertEqual(agent.monitor.total_input_token_count, 10) # Should have done one monitoring callbacks self.assertEqual(agent.monitor.total_output_token_count, 0) def test_streaming_agent_text_output(self): @@ -129,12 +131,16 @@ def test_streaming_agent_text_output(self): tools=[], model=FakeLLMModel(), max_steps=1, + planning_interval=2, ) # Use stream_to_gradio to capture the output outputs = list(stream_to_gradio(agent, task="Test task")) - self.assertEqual(len(outputs), 7) + self.assertEqual(len(outputs), 11) + plan_message = outputs[1] + self.assertEqual(plan_message.role, "assistant") + self.assertIn("Code:", plan_message.content) final_message = outputs[-1] self.assertEqual(final_message.role, "assistant") self.assertIn("This is the final answer.", final_message.content) @@ -155,7 +161,7 @@ def test_streaming_agent_image_output(self): ) ) - self.assertEqual(len(outputs), 5) + self.assertEqual(len(outputs), 6) final_message = outputs[-1] self.assertEqual(final_message.role, "assistant") self.assertIsInstance(final_message.content, dict) @@ -163,21 +169,20 @@ def test_streaming_agent_image_output(self): self.assertEqual(final_message.content["mime_type"], "image/png") def test_streaming_with_agent_error(self): - logger = AgentLogger(level=LogLevel.INFO) - - def dummy_model(prompt, **kwargs): - raise AgentError("Simulated agent error", logger) + class DummyModel(Model): + def generate(self, prompt, **kwargs): + return ChatMessage(role="assistant", content="Malformed call") agent = CodeAgent( tools=[], - model=dummy_model, + model=DummyModel(), max_steps=1, ) # Use stream_to_gradio to capture the output outputs = list(stream_to_gradio(agent, task="Test task")) - self.assertEqual(len(outputs), 9) + self.assertEqual(len(outputs), 13) final_message = outputs[-1] self.assertEqual(final_message.role, "assistant") - self.assertIn("Simulated agent error", final_message.content) + self.assertIn("Malformed call", final_message.content) diff --git a/tests/test_remote_executors.py b/tests/test_remote_executors.py new file mode 100644 index 000000000..f7fe05ed2 --- /dev/null +++ b/tests/test_remote_executors.py @@ -0,0 +1,105 @@ +import io +from textwrap import dedent +from unittest.mock import MagicMock, patch + +import docker +import PIL.Image +import pytest +from rich.console import Console + +from smolagents.monitoring import AgentLogger, LogLevel +from smolagents.remote_executors import DockerExecutor, E2BExecutor +from smolagents.utils import AgentError + +from .utils.markers import require_run_all + + +class TestE2BExecutorMock: + def test_e2b_executor_instantiation(self): + logger = MagicMock() + with patch("e2b_code_interpreter.Sandbox") as mock_sandbox: + mock_sandbox.return_value.commands.run.return_value.error = None + mock_sandbox.return_value.run_code.return_value.error = None + executor = E2BExecutor( + additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60 + ) + assert isinstance(executor, E2BExecutor) + assert executor.logger == logger + assert executor.final_answer_pattern.pattern == r"^final_answer\((.*)\)$" + assert executor.sandbox == mock_sandbox.return_value + assert mock_sandbox.call_count == 1 + assert mock_sandbox.call_args.kwargs == { + "api_key": "dummy-api-key", + "template": "dummy-template-id", + "timeout": 60, + } + + +@pytest.fixture +def docker_executor(): + executor = DockerExecutor( + additional_imports=["pillow", "numpy"], + logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())), + ) + yield executor + executor.delete() + + +@require_run_all +class TestDockerExecutor: + @pytest.fixture(autouse=True) + def set_executor(self, docker_executor): + self.executor = docker_executor + + def test_initialization(self): + """Check if DockerExecutor initializes without errors""" + assert self.executor.container is not None, "Container should be initialized" + + def test_state_persistence(self): + """Test that variables and imports form one snippet persist in the next""" + code_action = "import numpy as np; a = 2" + self.executor(code_action) + + code_action = "print(np.sqrt(a))" + result, logs, final_answer = self.executor(code_action) + assert "1.41421" in logs + + def test_execute_output(self): + """Test execution that returns a string""" + code_action = 'final_answer("This is the final answer")' + result, logs, final_answer = self.executor(code_action) + assert result == "This is the final answer", "Result should be 'This is the final answer'" + + def test_execute_multiline_output(self): + """Test execution that returns a string""" + code_action = 'result = "This is the final answer"\nfinal_answer(result)' + result, logs, final_answer = self.executor(code_action) + assert result == "This is the final answer", "Result should be 'This is the final answer'" + + def test_execute_image_output(self): + """Test execution that returns a base64 image""" + code_action = dedent(""" + import base64 + from PIL import Image + from io import BytesIO + image = Image.new("RGB", (10, 10), (255, 0, 0)) + final_answer(image) + """) + result, logs, final_answer = self.executor(code_action) + assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image" + + def test_syntax_error_handling(self): + """Test handling of syntax errors""" + code_action = 'print("Missing Parenthesis' # Syntax error + with pytest.raises(AgentError) as exception_info: + self.executor(code_action) + assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error" + + def test_cleanup_on_deletion(self): + """Test if Docker container stops and removes on deletion""" + container_id = self.executor.container.id + self.executor.delete() # Trigger cleanup + + client = docker.from_env() + containers = [c.id for c in client.containers.list(all=True)] + assert container_id not in containers, "Container should be removed" diff --git a/tests/test_search.py b/tests/test_search.py index c146c6a67..7ed66636c 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest from smolagents import DuckDuckGoSearchTool @@ -21,8 +20,8 @@ from .utils.markers import require_run_all -class DuckDuckGoSearchToolTester(unittest.TestCase, ToolTesterMixin): - def setUp(self): +class TestDuckDuckGoSearchTool(ToolTesterMixin): + def setup_method(self): self.tool = DuckDuckGoSearchTool() self.tool.setup() diff --git a/tests/test_tool_validation.py b/tests/test_tool_validation.py index f3a94ded2..a1ce170e7 100644 --- a/tests/test_tool_validation.py +++ b/tests/test_tool_validation.py @@ -1,8 +1,11 @@ +import ast +from textwrap import dedent + import pytest from smolagents.default_tools import DuckDuckGoSearchTool, GoogleSearchTool, SpeechToTextTool, VisitWebpageTool -from smolagents.tool_validation import validate_tool_attributes -from smolagents.tools import Tool +from smolagents.tool_validation import MethodChecker, validate_tool_attributes +from smolagents.tools import Tool, tool UNDEFINED_VARIABLE = "undefined_variable" @@ -29,8 +32,32 @@ def forward(self, input: str) -> str: return input.upper() -def test_validate_tool_attributes_valid(): - assert validate_tool_attributes(ValidTool) is None +@tool +def valid_tool_function(input: str) -> str: + """A valid tool function. + + Args: + input (str): Input string. + """ + return input.upper() + + +@pytest.mark.parametrize("tool_class", [ValidTool, valid_tool_function.__class__]) +def test_validate_tool_attributes_valid(tool_class): + assert validate_tool_attributes(tool_class) is None + + +class InvalidToolName(Tool): + name = "invalid tool name" + description = "Tool with invalid name" + inputs = {"input": {"type": "string", "description": "input"}} + output_type = "string" + + def __init__(self): + super().__init__() + + def forward(self, input: str) -> str: + return input class InvalidToolComplexAttrs(Tool): @@ -88,6 +115,10 @@ def forward(self, input: str) -> str: @pytest.mark.parametrize( "tool_class, expected_error", [ + ( + InvalidToolName, + "Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found 'invalid tool name'", + ), (InvalidToolComplexAttrs, "Complex attributes should be defined in __init__, not as class attributes"), (InvalidToolRequiredParams, "Parameters in __init__ must have default values, found required parameters"), ( @@ -100,3 +131,51 @@ def forward(self, input: str) -> str: def test_validate_tool_attributes_exceptions(tool_class, expected_error): with pytest.raises(ValueError, match=expected_error): validate_tool_attributes(tool_class) + + +class MultipleAssignmentsTool(Tool): + name = "multiple_assignments_tool" + description = "Tool with multiple assignments" + inputs = {"input": {"type": "string", "description": "input"}} + output_type = "string" + + def __init__(self): + super().__init__() + + def forward(self, input: str) -> str: + a, b = "1", "2" + return a + b + + +def test_validate_tool_attributes_multiple_assignments(): + validate_tool_attributes(MultipleAssignmentsTool) + + +@tool +def tool_function_with_multiple_assignments(input: str) -> str: + """A valid tool function. + + Args: + input (str): Input string. + """ + a, b = "1", "2" + return input.upper() + a + b + + +@pytest.mark.parametrize("tool_instance", [MultipleAssignmentsTool(), tool_function_with_multiple_assignments]) +def test_tool_to_dict_validation_with_multiple_assignments(tool_instance): + tool_instance.to_dict() + + +class TestMethodChecker: + def test_multiple_assignments(self): + source_code = dedent( + """ + def forward(self) -> str: + a, b = "1", "2" + return a + b + """ + ) + method_checker = MethodChecker(set()) + method_checker.visit(ast.parse(source_code)) + assert method_checker.errors == [] diff --git a/tests/test_tools.py b/tests/test_tools.py index 4ac48e07d..f82c08753 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -12,93 +12,76 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import inspect import os -import tempfile -import unittest -from pathlib import Path from textwrap import dedent -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Literal from unittest.mock import MagicMock, patch import mcp import numpy as np +import PIL.Image import pytest -import torch -from transformers import is_torch_available, is_vision_available -from transformers.testing_utils import get_tests_dir -from smolagents.agent_types import _AGENT_TYPE_MAPPING, AgentAudio, AgentImage, AgentText -from smolagents.tools import AUTHORIZED_TYPES, Tool, ToolCollection, tool +from smolagents.agent_types import _AGENT_TYPE_MAPPING +from smolagents.tools import AUTHORIZED_TYPES, Tool, ToolCollection, launch_gradio_demo, tool - -if is_torch_available(): - import torch - -if is_vision_available(): - from PIL import Image - - -def create_inputs(tool_inputs: Dict[str, Dict[Union[str, type], str]]): - inputs = {} - - for input_name, input_desc in tool_inputs.items(): - input_type = input_desc["type"] - - if input_type == "string": - inputs[input_name] = "Text input" - elif input_type == "image": - inputs[input_name] = Image.open(Path(get_tests_dir("fixtures")) / "000000039769.png").resize((512, 512)) - elif input_type == "audio": - inputs[input_name] = np.ones(3000) - else: - raise ValueError(f"Invalid type requested: {input_type}") - - return inputs - - -def output_type(output): - if isinstance(output, (str, AgentText)): - return "string" - elif isinstance(output, (Image.Image, AgentImage)): - return "image" - elif isinstance(output, (torch.Tensor, AgentAudio)): - return "audio" - else: - raise TypeError(f"Invalid output: {output}") +from .utils.markers import require_run_all class ToolTesterMixin: def test_inputs_output(self): - self.assertTrue(hasattr(self.tool, "inputs")) - self.assertTrue(hasattr(self.tool, "output_type")) + assert hasattr(self.tool, "inputs") + assert hasattr(self.tool, "output_type") inputs = self.tool.inputs - self.assertTrue(isinstance(inputs, dict)) + assert isinstance(inputs, dict) for _, input_spec in inputs.items(): - self.assertTrue("type" in input_spec) - self.assertTrue("description" in input_spec) - self.assertTrue(input_spec["type"] in AUTHORIZED_TYPES) - self.assertTrue(isinstance(input_spec["description"], str)) + assert "type" in input_spec + assert "description" in input_spec + assert input_spec["type"] in AUTHORIZED_TYPES + assert isinstance(input_spec["description"], str) output_type = self.tool.output_type - self.assertTrue(output_type in AUTHORIZED_TYPES) + assert output_type in AUTHORIZED_TYPES def test_common_attributes(self): - self.assertTrue(hasattr(self.tool, "description")) - self.assertTrue(hasattr(self.tool, "name")) - self.assertTrue(hasattr(self.tool, "inputs")) - self.assertTrue(hasattr(self.tool, "output_type")) + assert hasattr(self.tool, "description") + assert hasattr(self.tool, "name") + assert hasattr(self.tool, "inputs") + assert hasattr(self.tool, "output_type") - def test_agent_type_output(self): + def test_agent_type_output(self, create_inputs): inputs = create_inputs(self.tool.inputs) output = self.tool(**inputs, sanitize_inputs_outputs=True) if self.tool.output_type != "any": agent_type = _AGENT_TYPE_MAPPING[self.tool.output_type] - self.assertTrue(isinstance(output, agent_type)) + assert isinstance(output, agent_type) + + @pytest.fixture + def create_inputs(self, shared_datadir): + def _create_inputs(tool_inputs: dict[str, dict[str | type, str]]) -> dict[str, Any]: + inputs = {} + + for input_name, input_desc in tool_inputs.items(): + input_type = input_desc["type"] + if input_type == "string": + inputs[input_name] = "Text input" + elif input_type == "image": + inputs[input_name] = PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512)) + elif input_type == "audio": + inputs[input_name] = np.ones(3000) + else: + raise ValueError(f"Invalid type requested: {input_type}") -class ToolTests(unittest.TestCase): + return inputs + + return _create_inputs + + +class TestTool: def test_tool_init_with_decorator(self): @tool def coolfunc(a: str, b: int) -> float: @@ -163,7 +146,7 @@ def coolfunc(a: str, b: int) -> int: assert coolfunc.output_type == "number" assert "docstring has no description for the argument" in str(e) - def test_saving_tool_raises_error_imports_outside_function(self): + def test_saving_tool_raises_error_imports_outside_function(self, tmp_path): with pytest.raises(Exception) as e: import numpy as np @@ -174,7 +157,7 @@ def get_current_time() -> str: """ return str(np.random.random()) - get_current_time.save("output") + get_current_time.save(tmp_path) assert "np" in str(e) @@ -191,7 +174,7 @@ def forward(self): return str(np.random.random()) get_current_time = GetCurrentTimeTool() - get_current_time.save("output") + get_current_time.save(tmp_path) assert "np" in str(e) @@ -243,7 +226,7 @@ class PassTool(Tool): inputs = {"string_input": {"type": "string", "description": "input description"}} output_type = "string" - def __init__(self, url: Optional[str] = "none"): + def __init__(self, url: str | None = "none"): super().__init__(self) self.url = url @@ -253,7 +236,7 @@ def forward(self, string_input: str) -> str: fail_tool = PassTool() fail_tool.to_dict() - def test_saving_tool_allows_no_imports_from_outside_methods(self): + def test_saving_tool_allows_no_imports_from_outside_methods(self, tmp_path): # Test that using imports from outside functions fails import numpy as np @@ -272,7 +255,7 @@ def forward(self, string_input): fail_tool = FailTool() with pytest.raises(Exception) as e: - fail_tool.save("output") + fail_tool.save(tmp_path) assert "'np' is undefined" in str(e) # Test that putting these imports inside functions works @@ -292,7 +275,7 @@ def forward(self, string_input): return self.useless_method() + string_input success_tool = SuccessTool() - success_tool.save("output") + success_tool.save(tmp_path) def test_tool_missing_class_attributes_raises_error(self): with pytest.raises(Exception) as e: @@ -308,7 +291,7 @@ class GetWeatherTool(Tool): }, } - def forward(self, location: str, celsius: Optional[bool] = False) -> str: + def forward(self, location: str, celsius: bool | None = False) -> str: return "The weather is UNGODLY with torrential rains and temperatures below -10ยฐC" GetWeatherTool() @@ -316,7 +299,7 @@ def forward(self, location: str, celsius: Optional[bool] = False) -> str: def test_tool_from_decorator_optional_args(self): @tool - def get_weather(location: str, celsius: Optional[bool] = False) -> str: + def get_weather(location: str, celsius: bool | None = False) -> str: """ Get weather in the next days at given location. Secretly this tool does not care about the location, it hates the weather everywhere. @@ -346,7 +329,7 @@ class GetWeatherTool(Tool): } output_type = "string" - def forward(self, location: str, celsius: Optional[bool] = False) -> str: + def forward(self, location: str, celsius: bool | None = False) -> str: return "The weather is UNGODLY with torrential rains and temperatures below -10ยฐC" GetWeatherTool() @@ -407,7 +390,7 @@ def get_weather(location: str, celsius: bool = False) -> str: assert get_weather.inputs["celsius"]["nullable"] - def test_tool_supports_any_none(self): + def test_tool_supports_any_none(self, tmp_path): @tool def get_weather(location: Any) -> None: """ @@ -418,14 +401,13 @@ def get_weather(location: Any) -> None: """ return - with tempfile.TemporaryDirectory() as tmp_dir: - get_weather.save(tmp_dir) + get_weather.save(tmp_path) assert get_weather.inputs["location"]["type"] == "any" assert get_weather.output_type == "null" def test_tool_supports_array(self): @tool - def get_weather(locations: List[str], months: Optional[Tuple[str, str]] = None) -> Dict[str, float]: + def get_weather(locations: list[str], months: tuple[str, str] | None = None) -> dict[str, float]: """ Get weather in the next days at given locations. @@ -438,7 +420,50 @@ def get_weather(locations: List[str], months: Optional[Tuple[str, str]] = None) assert get_weather.inputs["locations"]["type"] == "array" assert get_weather.inputs["months"]["type"] == "array" - def test_saving_tool_produces_valid_pyhon_code_with_multiline_description(self): + def test_tool_supports_string_literal(self): + @tool + def get_weather(unit: Literal["celsius", "fahrenheit"] = "celsius") -> None: + """ + Get weather in the next days at given location. + + Args: + unit: The unit of temperature + """ + return + + assert get_weather.inputs["unit"]["type"] == "string" + assert get_weather.inputs["unit"]["enum"] == ["celsius", "fahrenheit"] + + def test_tool_supports_numeric_literal(self): + @tool + def get_choice(choice: Literal[1, 2, 3]) -> None: + """ + Get choice based on the provided numeric literal. + + Args: + choice: The numeric choice to be made. + """ + return + + assert get_choice.inputs["choice"]["type"] == "integer" + assert get_choice.inputs["choice"]["enum"] == [1, 2, 3] + + def test_tool_supports_nullable_literal(self): + @tool + def get_choice(choice: Literal[1, 2, 3, None]) -> None: + """ + Get choice based on the provided value. + + Args: + choice: The numeric choice to be made. + """ + return + + assert get_choice.inputs["choice"]["type"] == "integer" + assert get_choice.inputs["choice"]["nullable"] is True + assert get_choice.inputs["choice"]["enum"] == [1, 2, 3] + + def test_saving_tool_produces_valid_pyhon_code_with_multiline_description(self, tmp_path): @tool def get_weather(location: Any) -> None: """ @@ -450,33 +475,81 @@ def get_weather(location: Any) -> None: """ return - with tempfile.TemporaryDirectory() as tmp_dir: - get_weather.save(tmp_dir) - with open(os.path.join(tmp_dir, "tool.py"), "r", encoding="utf-8") as f: - source_code = f.read() - compile(source_code, f.name, "exec") + get_weather.save(tmp_path) + with open(os.path.join(tmp_path, "tool.py"), "r", encoding="utf-8") as f: + source_code = f.read() + compile(source_code, f.name, "exec") + + @pytest.mark.parametrize("fixture_name", ["boolean_default_tool_class", "boolean_default_tool_function"]) + def test_to_dict_boolean_default_input(self, fixture_name, request): + """Test that boolean input parameter with default value is correctly represented in to_dict output""" + tool = request.getfixturevalue(fixture_name) + result = tool.to_dict() + # Check that the boolean default annotation is preserved + assert "flag: bool = False" in result["code"] + # Check nullable attribute is set for the parameter with default value + assert "'nullable': True" in result["code"] + + @pytest.mark.parametrize("fixture_name", ["optional_input_tool_class", "optional_input_tool_function"]) + def test_to_dict_optional_input(self, fixture_name, request): + """Test that Optional/nullable input parameter is correctly represented in to_dict output""" + tool = request.getfixturevalue(fixture_name) + result = tool.to_dict() + # Check the Optional type annotation is preserved + assert "optional_text: str | None = None" in result["code"] + # Check that the input is marked as nullable in the code + assert "'nullable': True" in result["code"] + + def test_from_dict_roundtrip(self, example_tool): + # Convert to dict + tool_dict = example_tool.to_dict() + # Create from dict + recreated_tool = Tool.from_dict(tool_dict) + # Verify properties + assert recreated_tool.name == example_tool.name + assert recreated_tool.description == example_tool.description + assert recreated_tool.inputs == example_tool.inputs + assert recreated_tool.output_type == example_tool.output_type + # Verify functionality + test_input = "Hello, world!" + assert recreated_tool(test_input) == test_input.upper() + + def test_tool_from_dict_invalid(self): + # Missing code key + with pytest.raises(ValueError) as e: + Tool.from_dict({"name": "invalid_tool"}) + assert "must contain 'code' key" in str(e) + + def test_tool_decorator_preserves_original_function(self): + # Define a test function with type hints and docstring + def test_function(items: list[str]) -> str: + """Join a list of strings. + Args: + items: A list of strings to join + Returns: + The joined string + """ + return ", ".join(items) - def test_saving_tool_produces_valid_python_code_with_complex_name(self): - # Test one cannot save tool with additional args in init - class FailTool(Tool): - name = 'spe"\rcific' - description = """test \n\r - description""" - inputs = {"string_input": {"type": "string", "description": "input description"}} - output_type = "string" + # Store original function signature, name, and source + original_signature = inspect.signature(test_function) + original_name = test_function.__name__ + original_docstring = test_function.__doc__ - def __init__(self): - super().__init__(self) + # Create a tool from the function + test_tool = tool(test_function) - def forward(self, string_input): - return "foo" + # Check that the original function is unchanged + assert original_signature == inspect.signature(test_function) + assert original_name == test_function.__name__ + assert original_docstring == test_function.__doc__ - fail_tool = FailTool() - with tempfile.TemporaryDirectory() as tmp_dir: - fail_tool.save(tmp_dir) - with open(os.path.join(tmp_dir, "tool.py"), "r", encoding="utf-8") as f: - source_code = f.read() - compile(source_code, f.name, "exec") + # Verify that the tool's forward method has a different signature (it has 'self') + tool_forward_sig = inspect.signature(test_tool.forward) + assert list(tool_forward_sig.parameters.keys())[0] == "self" + + # Original function should not have 'self' parameter + assert "self" not in original_signature.parameters @pytest.fixture @@ -500,12 +573,13 @@ def mock_smolagents_adapter(): class TestToolCollection: def test_from_mcp(self, mock_server_parameters, mock_mcp_adapt, mock_smolagents_adapter): - with ToolCollection.from_mcp(mock_server_parameters) as tool_collection: + with ToolCollection.from_mcp(mock_server_parameters, trust_remote_code=True) as tool_collection: assert isinstance(tool_collection, ToolCollection) assert len(tool_collection.tools) == 2 assert "tool1" in tool_collection.tools assert "tool2" in tool_collection.tools + @require_run_all def test_integration_from_mcp(self): # define the most simple mcp server with one tool that echoes the input text mcp_server_script = dedent("""\ @@ -525,7 +599,52 @@ def echo_tool(text: str) -> str: args=["-c", mcp_server_script], ) - with ToolCollection.from_mcp(mcp_server_params) as tool_collection: + with ToolCollection.from_mcp(mcp_server_params, trust_remote_code=True) as tool_collection: assert len(tool_collection.tools) == 1, "Expected 1 tool" assert tool_collection.tools[0].name == "echo_tool", "Expected tool name to be 'echo_tool'" assert tool_collection.tools[0](text="Hello") == "Hello", "Expected tool to echo the input text" + + def test_integration_from_mcp_with_sse(self): + import subprocess + import time + + # define the most simple mcp server with one tool that echoes the input text + mcp_server_script = dedent("""\ + from mcp.server.fastmcp import FastMCP + + mcp = FastMCP("Echo Server", host="127.0.0.1", port=8000) + + @mcp.tool() + def echo_tool(text: str) -> str: + return text + + mcp.run("sse") + """).strip() + + # start the SSE mcp server in a subprocess + server_process = subprocess.Popen( + ["python", "-c", mcp_server_script], + ) + + # wait for the server to start + time.sleep(1) + + try: + with ToolCollection.from_mcp( + {"url": "http://127.0.0.1:8000/sse"}, trust_remote_code=True + ) as tool_collection: + assert len(tool_collection.tools) == 1, "Expected 1 tool" + assert tool_collection.tools[0].name == "echo_tool", "Expected tool name to be 'echo_tool'" + assert tool_collection.tools[0](text="Hello") == "Hello", "Expected tool to echo the input text" + finally: + # clean up the process when test is done + server_process.kill() + server_process.wait() + + +@pytest.mark.parametrize("tool_fixture_name", ["boolean_default_tool_class"]) +def test_launch_gradio_demo_does_not_raise(tool_fixture_name, request): + tool = request.getfixturevalue(tool_fixture_name) + with patch("gradio.Interface.launch") as mock_launch: + launch_gradio_demo(tool) + assert mock_launch.call_count == 1 diff --git a/tests/test_types.py b/tests/test_types.py index 73465d0ed..e3050c9dd 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -16,17 +16,16 @@ import tempfile import unittest import uuid -from pathlib import Path -from PIL import Image +import PIL.Image from transformers.testing_utils import ( require_soundfile, - require_torch, - require_vision, ) from smolagents.agent_types import AgentAudio, AgentImage, AgentText +from .utils.markers import require_torch + def get_new_path(suffix="") -> str: directory = tempfile.mkdtemp() @@ -70,9 +69,8 @@ def test_from_string(self): self.assertEqual(agent_type.to_string(), path) -@require_vision @require_torch -class AgentImageTests(unittest.TestCase): +class TestAgentImage: def test_from_tensor(self): import torch @@ -81,37 +79,37 @@ def test_from_tensor(self): path = str(agent_type.to_string()) # Ensure that the tensor and the agent_type's tensor are the same - self.assertTrue(torch.allclose(tensor, agent_type._tensor, atol=1e-4)) + assert torch.allclose(tensor, agent_type._tensor, atol=1e-4) - self.assertIsInstance(agent_type.to_raw(), Image.Image) + assert isinstance(agent_type.to_raw(), PIL.Image.Image) # Ensure the path remains even after the object deletion del agent_type - self.assertTrue(os.path.exists(path)) + assert os.path.exists(path) - def test_from_string(self): - path = Path("tests/fixtures/000000039769.png") - image = Image.open(path) + def test_from_string(self, shared_datadir): + path = shared_datadir / "000000039769.png" + image = PIL.Image.open(path) agent_type = AgentImage(path) - self.assertTrue(path.samefile(agent_type.to_string())) - self.assertTrue(image == agent_type.to_raw()) + assert path.samefile(agent_type.to_string()) + assert image == agent_type.to_raw() # Ensure the path remains even after the object deletion del agent_type - self.assertTrue(os.path.exists(path)) + assert os.path.exists(path) - def test_from_image(self): - path = Path("tests/fixtures/000000039769.png") - image = Image.open(path) + def test_from_image(self, shared_datadir): + path = shared_datadir / "000000039769.png" + image = PIL.Image.open(path) agent_type = AgentImage(image) - self.assertFalse(path.samefile(agent_type.to_string())) - self.assertTrue(image == agent_type.to_raw()) + assert not path.samefile(agent_type.to_string()) + assert image == agent_type.to_raw() # Ensure the path remains even after the object deletion del agent_type - self.assertTrue(os.path.exists(path)) + assert os.path.exists(path) class AgentTextTests(unittest.TestCase): diff --git a/tests/test_utils.py b/tests/test_utils.py index 16ba39141..d4aa11970 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -14,8 +14,6 @@ # limitations under the License. import inspect import os -import pathlib -import tempfile import textwrap import unittest @@ -24,7 +22,75 @@ from smolagents import Tool from smolagents.tools import tool -from smolagents.utils import get_source, parse_code_blobs +from smolagents.utils import get_source, instance_to_source, is_valid_name, parse_code_blobs, parse_json_blob + + +class ValidTool(Tool): + name = "valid_tool" + description = "A valid tool" + inputs = {"input": {"type": "string", "description": "input"}} + output_type = "string" + simple_attr = "string" + dict_attr = {"key": "value"} + + def __init__(self, optional_param="default"): + super().__init__() + self.param = optional_param + + def forward(self, input: str) -> str: + return input.upper() + + +@tool +def valid_tool_function(input: str) -> str: + """A valid tool function. + + Args: + input (str): Input string. + """ + return input.upper() + + +VALID_TOOL_SOURCE = """\ +from smolagents.tools import Tool + +class ValidTool(Tool): + name = "valid_tool" + description = "A valid tool" + inputs = {'input': {'type': 'string', 'description': 'input'}} + output_type = "string" + simple_attr = "string" + dict_attr = {'key': 'value'} + + def __init__(self, optional_param="default"): + super().__init__() + self.param = optional_param + + def forward(self, input: str) -> str: + return input.upper() +""" + +VALID_TOOL_FUNCTION_SOURCE = '''\ +from smolagents.tools import Tool + +class SimpleTool(Tool): + name = "valid_tool_function" + description = "A valid tool function." + inputs = {'input': {'type': 'string', 'description': 'Input string.'}} + output_type = "string" + + def __init__(self): + self.is_initialized = True + + @tool + def valid_tool_function(input: str) -> str: + """A valid tool function. + + Args: + input (str): Input string. + """ + return input.upper() +''' class AgentTextTests(unittest.TestCase): @@ -47,25 +113,14 @@ def test_parse_code_blobs(self): output = parse_code_blobs(code_blob) assert output == code_blob - def test_multiple_code_blobs(self): - test_input = """Here's a function that adds numbers: -```python -def add(a, b): - return a + b -``` -And here's a function that multiplies them: -```py -def multiply(a, b): - return a * b -```""" - - expected_output = """def add(a, b): - return a + b + # Allow whitespaces after header + output = parse_code_blobs("```py \ncode_a\n````") + assert output == "code_a" -def multiply(a, b): - return a * b""" + def test_multiple_code_blobs(self): + test_input = "```\nFoo\n```\n\n```py\ncode_a\n````\n\n```python\ncode_b\n```" result = parse_code_blobs(test_input) - assert result == expected_output + assert result == "Foo\n\ncode_a\n\ncode_b" @pytest.fixture(scope="function") @@ -127,7 +182,15 @@ def test_get_source_ipython_errors_type_error(): get_source(None) -def test_e2e_class_tool_save(): +@pytest.mark.parametrize( + "tool, expected_tool_source", [(ValidTool(), VALID_TOOL_SOURCE), (valid_tool_function, VALID_TOOL_FUNCTION_SOURCE)] +) +def test_instance_to_source(tool, expected_tool_source): + tool_source = instance_to_source(tool, base_cls=Tool) + assert tool_source == expected_tool_source + + +def test_e2e_class_tool_save(tmp_path): class TestTool(Tool): name = "test_tool" description = "Test tool description" @@ -145,48 +208,46 @@ def forward(self, task: str): return task test_tool = TestTool() - with tempfile.TemporaryDirectory() as tmp_dir: - test_tool.save(tmp_dir, make_gradio_app=True) - assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"} - assert ( - pathlib.Path(tmp_dir, "tool.py").read_text() - == """from typing import Any, Optional -from smolagents.tools import Tool -import IPython - -class TestTool(Tool): - name = "test_tool" - description = "Test tool description" - inputs = {'task': {'type': 'string', 'description': 'tool input'}} - output_type = "string" - - def forward(self, task: str): - import IPython # noqa: F401 + test_tool.save(tmp_path, make_gradio_app=True) + assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"} + assert (tmp_path / "tool.py").read_text() == textwrap.dedent( + """\ + from typing import Any, Optional + from smolagents.tools import Tool + import IPython - return task + class TestTool(Tool): + name = "test_tool" + description = "Test tool description" + inputs = {'task': {'type': 'string', 'description': 'tool input'}} + output_type = "string" - def __init__(self, *args, **kwargs): - self.is_initialized = False -""" - ) - requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split()) - assert requirements == {"IPython", "smolagents"} - assert ( - pathlib.Path(tmp_dir, "app.py").read_text() - == """from smolagents import launch_gradio_demo -from tool import TestTool + def forward(self, task: str): + import IPython # noqa: F401 -tool = TestTool() + return task -launch_gradio_demo(tool) -""" - ) + def __init__(self, *args, **kwargs): + self.is_initialized = False + """ + ) + requirements = set((tmp_path / "requirements.txt").read_text().split()) + assert requirements == {"IPython", "smolagents"} + assert (tmp_path / "app.py").read_text() == textwrap.dedent( + """\ + from smolagents import launch_gradio_demo + from tool import TestTool + + tool = TestTool() + launch_gradio_demo(tool) + """ + ) -def test_e2e_ipython_class_tool_save(): +def test_e2e_ipython_class_tool_save(tmp_path): shell = InteractiveShell.instance() - with tempfile.TemporaryDirectory() as tmp_dir: - code_blob = textwrap.dedent(f""" + code_blob = textwrap.dedent( + f"""\ from smolagents.tools import Tool class TestTool(Tool): name = "test_tool" @@ -201,46 +262,46 @@ def forward(self, task: str): import IPython # noqa: F401 return task - TestTool().save("{tmp_dir}", make_gradio_app=True) - """) - assert shell.run_cell(code_blob, store_history=True).success - assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"} - assert ( - pathlib.Path(tmp_dir, "tool.py").read_text() - == """from typing import Any, Optional -from smolagents.tools import Tool -import IPython - -class TestTool(Tool): - name = "test_tool" - description = "Test tool description" - inputs = {'task': {'type': 'string', 'description': 'tool input'}} - output_type = "string" - - def forward(self, task: str): - import IPython # noqa: F401 + TestTool().save("{tmp_path}", make_gradio_app=True) + """ + ) + assert shell.run_cell(code_blob, store_history=True).success + assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"} + assert (tmp_path / "tool.py").read_text() == textwrap.dedent( + """\ + from typing import Any, Optional + from smolagents.tools import Tool + import IPython - return task + class TestTool(Tool): + name = "test_tool" + description = "Test tool description" + inputs = {'task': {'type': 'string', 'description': 'tool input'}} + output_type = "string" - def __init__(self, *args, **kwargs): - self.is_initialized = False -""" - ) - requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split()) - assert requirements == {"IPython", "smolagents"} - assert ( - pathlib.Path(tmp_dir, "app.py").read_text() - == """from smolagents import launch_gradio_demo -from tool import TestTool + def forward(self, task: str): + import IPython # noqa: F401 -tool = TestTool() + return task -launch_gradio_demo(tool) -""" - ) + def __init__(self, *args, **kwargs): + self.is_initialized = False + """ + ) + requirements = set((tmp_path / "requirements.txt").read_text().split()) + assert requirements == {"IPython", "smolagents"} + assert (tmp_path / "app.py").read_text() == textwrap.dedent( + """\ + from smolagents import launch_gradio_demo + from tool import TestTool + + tool = TestTool() + launch_gradio_demo(tool) + """ + ) -def test_e2e_function_tool_save(): +def test_e2e_function_tool_save(tmp_path): @tool def test_tool(task: str) -> str: """ @@ -253,49 +314,47 @@ def test_tool(task: str) -> str: return task - with tempfile.TemporaryDirectory() as tmp_dir: - test_tool.save(tmp_dir, make_gradio_app=True) - assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"} - assert ( - pathlib.Path(tmp_dir, "tool.py").read_text() - == """from smolagents import Tool -from typing import Any, Optional + test_tool.save(tmp_path, make_gradio_app=True) + assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"} + assert (tmp_path / "tool.py").read_text() == textwrap.dedent( + """\ + from smolagents import Tool + from typing import Any, Optional -class SimpleTool(Tool): - name = "test_tool" - description = "Test tool description" - inputs = {"task":{"type":"string","description":"tool input"}} - output_type = "string" - - def forward(self, task: str) -> str: - \""" - Test tool description - - Args: - task: tool input - \""" - import IPython # noqa: F401 + class SimpleTool(Tool): + name = "test_tool" + description = "Test tool description" + inputs = {'task': {'type': 'string', 'description': 'tool input'}} + output_type = "string" - return task""" - ) - requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split()) - assert requirements == {"smolagents"} # FIXME: IPython should be in the requirements - assert ( - pathlib.Path(tmp_dir, "app.py").read_text() - == """from smolagents import launch_gradio_demo -from tool import SimpleTool + def forward(self, task: str) -> str: + \""" + Test tool description -tool = SimpleTool() + Args: + task: tool input + \""" + import IPython # noqa: F401 -launch_gradio_demo(tool) -""" - ) + return task""" + ) + requirements = set((tmp_path / "requirements.txt").read_text().split()) + assert requirements == {"smolagents"} # FIXME: IPython should be in the requirements + assert (tmp_path / "app.py").read_text() == textwrap.dedent( + """\ + from smolagents import launch_gradio_demo + from tool import SimpleTool + + tool = SimpleTool() + launch_gradio_demo(tool) + """ + ) -def test_e2e_ipython_function_tool_save(): +def test_e2e_ipython_function_tool_save(tmp_path): shell = InteractiveShell.instance() - with tempfile.TemporaryDirectory() as tmp_dir: - code_blob = textwrap.dedent(f""" + code_blob = textwrap.dedent( + f""" from smolagents import tool @tool @@ -310,41 +369,129 @@ def test_tool(task: str) -> str: return task - test_tool.save("{tmp_dir}", make_gradio_app=True) - """) - assert shell.run_cell(code_blob, store_history=True).success - assert set(os.listdir(tmp_dir)) == {"requirements.txt", "app.py", "tool.py"} - assert ( - pathlib.Path(tmp_dir, "tool.py").read_text() - == """from smolagents import Tool -from typing import Any, Optional + test_tool.save("{tmp_path}", make_gradio_app=True) + """ + ) + assert shell.run_cell(code_blob, store_history=True).success + assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"} + assert (tmp_path / "tool.py").read_text() == textwrap.dedent( + """\ + from smolagents import Tool + from typing import Any, Optional + + class SimpleTool(Tool): + name = "test_tool" + description = "Test tool description" + inputs = {'task': {'type': 'string', 'description': 'tool input'}} + output_type = "string" -class SimpleTool(Tool): - name = "test_tool" - description = "Test tool description" - inputs = {"task":{"type":"string","description":"tool input"}} - output_type = "string" + def forward(self, task: str) -> str: + \""" + Test tool description - def forward(self, task: str) -> str: - \""" - Test tool description + Args: + task: tool input + \""" + import IPython # noqa: F401 - Args: - task: tool input - \""" - import IPython # noqa: F401 + return task""" + ) + requirements = set((tmp_path / "requirements.txt").read_text().split()) + assert requirements == {"smolagents"} # FIXME: IPython should be in the requirements + assert (tmp_path / "app.py").read_text() == textwrap.dedent( + """\ + from smolagents import launch_gradio_demo + from tool import SimpleTool + + tool = SimpleTool() + launch_gradio_demo(tool) + """ + ) + + +@pytest.mark.parametrize( + "raw_json, expected_data, expected_blob", + [ + ( + """{}""", + {}, + "", + ), + ( + """Text{}""", + {}, + "Text", + ), + ( + """{"simple": "json"}""", + {"simple": "json"}, + "", + ), + ( + """With text here{"simple": "json"}""", + {"simple": "json"}, + "With text here", + ), + ( + """{"simple": "json"}With text after""", + {"simple": "json"}, + "", + ), + ( + """With text before{"simple": "json"}And text after""", + {"simple": "json"}, + "With text before", + ), + ], +) +def test_parse_json_blob_with_valid_json(raw_json, expected_data, expected_blob): + data, blob = parse_json_blob(raw_json) - return task""" - ) - requirements = set(pathlib.Path(tmp_dir, "requirements.txt").read_text().split()) - assert requirements == {"smolagents"} # FIXME: IPython should be in the requirements - assert ( - pathlib.Path(tmp_dir, "app.py").read_text() - == """from smolagents import launch_gradio_demo -from tool import SimpleTool + assert data == expected_data + assert blob == expected_blob -tool = SimpleTool() -launch_gradio_demo(tool) -""" - ) +@pytest.mark.parametrize( + "raw_json", + [ + """simple": "json"}""", + """With text here"simple": "json"}""", + """{"simple": ""json"}With text after""", + """{"simple": "json"With text after""", + "}}", + ], +) +def test_parse_json_blob_with_invalid_json(raw_json): + with pytest.raises(Exception): + parse_json_blob(raw_json) + + +@pytest.mark.parametrize( + "name,expected", + [ + # Valid identifiers + ("valid_name", True), + ("ValidName", True), + ("valid123", True), + ("_private", True), + # Invalid identifiers + ("", False), + ("123invalid", False), + ("invalid-name", False), + ("invalid name", False), + ("invalid.name", False), + # Python keywords + ("if", False), + ("for", False), + ("class", False), + ("return", False), + # Non-string inputs + (123, False), + (None, False), + ([], False), + ({}, False), + ], +) +def test_is_valid_name(name, expected): + """Test the is_valid_name function with various inputs.""" + assert is_valid_name(name) is expected diff --git a/tests/utils/markers.py b/tests/utils/markers.py index 8901f5f25..5240f9880 100644 --- a/tests/utils/markers.py +++ b/tests/utils/markers.py @@ -15,8 +15,10 @@ """Markers for tests .""" import os +from importlib.util import find_spec import pytest require_run_all = pytest.mark.skipif(not os.getenv("RUN_ALL"), reason="requires RUN_ALL environment variable") +require_torch = pytest.mark.skipif(find_spec("torch") is None, reason="requires torch") diff --git a/utils/check_tests_in_ci.py b/utils/check_tests_in_ci.py deleted file mode 100644 index b320e23e7..000000000 --- a/utils/check_tests_in_ci.py +++ /dev/null @@ -1,55 +0,0 @@ -# coding=utf-8 -# Copyright 2025-present, the HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Check that all tests are called in CI.""" - -from pathlib import Path - - -ROOT = Path(__file__).parent.parent - -TESTS_FOLDER = ROOT / "tests" -CI_WORKFLOW_FILE = ROOT / ".github" / "workflows" / "tests.yml" - - -def check_tests_in_ci(): - """List all test files in `./tests/` and check if they are listed in the CI workflow. - - Since each test file is triggered separately in the CI workflow, it is easy to forget a new one when adding new - tests, hence this check. - - NOTE: current implementation is quite naive but should work for now. Must be updated if one want to ignore some - tests or if file naming is updated (currently only files starting by `test_*` are checked) - """ - test_files = [ - path.relative_to(TESTS_FOLDER).as_posix() - for path in TESTS_FOLDER.glob("**/*.py") - if path.name.startswith("test_") - ] - ci_workflow_file_content = CI_WORKFLOW_FILE.read_text() - missing_test_files = [test_file for test_file in test_files if test_file not in ci_workflow_file_content] - if missing_test_files: - print( - "โŒ Some test files seem to be ignored in the CI:\n" - + "\n".join(f" - {test_file}" for test_file in missing_test_files) - + f"\n Please add them manually in {CI_WORKFLOW_FILE}." - ) - exit(1) - else: - print("โœ… All good!") - exit(0) - - -if __name__ == "__main__": - check_tests_in_ci()