-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Add agent tools tests #44125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add agent tools tests #44125
Changes from all commits
6c01eda
86b7539
6e5bd03
728e798
7325d03
3f7d30f
0101ab2
601677a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| # ------------------------------------ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT License. | ||
| # ------------------------------------ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,144 @@ | ||
| # pylint: disable=too-many-lines,line-too-long,useless-suppression | ||
| # ------------------------------------ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT License. | ||
| # ------------------------------------ | ||
| # cSpell:disable | ||
|
|
||
| """ | ||
| Multi-Tool Tests: Code Interpreter + Function Tool | ||
|
|
||
| Tests various scenarios using an agent with Code Interpreter and Function Tool. | ||
| All tests use the same tool combination but different inputs and workflows. | ||
| """ | ||
|
|
||
| import os | ||
| import json | ||
| import pytest | ||
| from test_base import TestBase, servicePreparer | ||
| from devtools_testutils import is_live_and_not_recording | ||
| from azure.ai.projects.models import PromptAgentDefinition, CodeInterpreterTool, CodeInterpreterToolAuto, FunctionTool | ||
| from openai.types.responses.response_input_param import FunctionCallOutput, ResponseInputParam | ||
|
|
||
|
|
||
| class TestAgentCodeInterpreterAndFunction(TestBase): | ||
| """Tests for agents using Code Interpreter + Function Tool combination.""" | ||
|
|
||
| @servicePreparer() | ||
| @pytest.mark.skipif( | ||
| condition=(not is_live_and_not_recording()), | ||
| reason="Skipped because we cannot record network calls with OpenAI client", | ||
| ) | ||
| def test_calculate_and_save(self, **kwargs): | ||
| """ | ||
| Test calculation with Code Interpreter and saving with Function Tool. | ||
| """ | ||
|
|
||
| model = self.test_agents_params["model_deployment_name"] | ||
|
|
||
| # Setup | ||
| project_client = self.create_client(operation_group="agents", **kwargs) | ||
| openai_client = project_client.get_openai_client() | ||
|
|
||
paulbatum marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # Define function tool | ||
| func_tool = FunctionTool( | ||
| name="save_result", | ||
| description="Save analysis result", | ||
| parameters={ | ||
| "type": "object", | ||
| "properties": { | ||
| "result": {"type": "string", "description": "The result"}, | ||
| }, | ||
| "required": ["result"], | ||
| "additionalProperties": False, | ||
| }, | ||
| strict=True, | ||
| ) | ||
|
|
||
| # Create agent | ||
| agent = project_client.agents.create_version( | ||
| agent_name="code-func-agent", | ||
| definition=PromptAgentDefinition( | ||
| model=model, | ||
| instructions="Run calculations and save results.", | ||
| tools=[ | ||
| CodeInterpreterTool(container=CodeInterpreterToolAuto()), | ||
| func_tool, | ||
| ], | ||
| ), | ||
| description="Agent with Code Interpreter and Function Tool.", | ||
| ) | ||
| print(f"Agent created (id: {agent.id})") | ||
|
|
||
| # Use the agent | ||
| response = openai_client.responses.create( | ||
| input="Calculate 5 + 3 and save the result.", | ||
| extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, | ||
| ) | ||
| print(f"Response received (id: {response.id})") | ||
|
|
||
| assert response.id is not None | ||
| print("✓ Code Interpreter + Function Tool works!") | ||
|
|
||
| # Cleanup | ||
| project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) | ||
|
|
||
| @servicePreparer() | ||
| @pytest.mark.skipif( | ||
| condition=(not is_live_and_not_recording()), | ||
| reason="Skipped because we cannot record network calls with OpenAI client", | ||
| ) | ||
| def test_generate_data_and_report(self, **kwargs): | ||
| """ | ||
| Test generating data with Code Interpreter and reporting with Function. | ||
| """ | ||
|
|
||
| model = self.test_agents_params["model_deployment_name"] | ||
|
|
||
| # Setup | ||
| project_client = self.create_client(operation_group="agents", **kwargs) | ||
| openai_client = project_client.get_openai_client() | ||
|
|
||
| # Define function tool | ||
| report_function = FunctionTool( | ||
| name="generate_report", | ||
| description="Generate a report with the provided data", | ||
| parameters={ | ||
| "type": "object", | ||
| "properties": { | ||
| "title": {"type": "string", "description": "Report title"}, | ||
| "summary": {"type": "string", "description": "Report summary"}, | ||
| }, | ||
| "required": ["title", "summary"], | ||
| "additionalProperties": False, | ||
| }, | ||
| strict=True, | ||
| ) | ||
|
|
||
| # Create agent | ||
| agent = project_client.agents.create_version( | ||
| agent_name="code-func-report-agent", | ||
| definition=PromptAgentDefinition( | ||
| model=model, | ||
| instructions="Generate data using code and create reports with the generate_report function.", | ||
| tools=[ | ||
| CodeInterpreterTool(container=CodeInterpreterToolAuto()), | ||
| report_function, | ||
| ], | ||
| ), | ||
| description="Agent for data generation and reporting.", | ||
| ) | ||
| print(f"Agent created (id: {agent.id})") | ||
|
|
||
| # Request data generation and report | ||
| response = openai_client.responses.create( | ||
| input="Generate a list of 10 random numbers between 1 and 100, calculate their average, and create a report.", | ||
| extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, | ||
| ) | ||
|
|
||
| print(f"Response received (id: {response.id})") | ||
| assert response.id is not None | ||
| print("✓ Data generation and reporting works!") | ||
|
|
||
| # Cleanup | ||
| project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,156 @@ | ||||||
| # pylint: disable=too-many-lines,line-too-long,useless-suppression | ||||||
| # ------------------------------------ | ||||||
| # Copyright (c) Microsoft Corporation. | ||||||
| # Licensed under the MIT License. | ||||||
| # ------------------------------------ | ||||||
| # cSpell:disable | ||||||
|
|
||||||
| """ | ||||||
| Multi-Tool Tests: File Search + Code Interpreter | ||||||
|
|
||||||
| Tests various scenarios using an agent with File Search and Code Interpreter. | ||||||
| All tests use the same tool combination but different inputs and workflows. | ||||||
| """ | ||||||
|
|
||||||
| import os | ||||||
| import pytest | ||||||
| from io import BytesIO | ||||||
| from test_base import TestBase, servicePreparer | ||||||
| from devtools_testutils import is_live_and_not_recording | ||||||
| from azure.ai.projects.models import PromptAgentDefinition, FileSearchTool, CodeInterpreterTool, CodeInterpreterToolAuto | ||||||
|
|
||||||
|
|
||||||
| class TestAgentFileSearchAndCodeInterpreter(TestBase): | ||||||
| """Tests for agents using File Search + Code Interpreter combination.""" | ||||||
|
|
||||||
| @servicePreparer() | ||||||
| @pytest.mark.skipif( | ||||||
| condition=(not is_live_and_not_recording()), | ||||||
| reason="Skipped because we cannot record network calls with OpenAI client", | ||||||
| ) | ||||||
| def test_find_and_analyze_data(self, **kwargs): | ||||||
| """ | ||||||
| Test finding data with File Search and analyzing with Code Interpreter. | ||||||
| """ | ||||||
|
|
||||||
| model = self.test_agents_params["model_deployment_name"] | ||||||
|
|
||||||
| # Setup | ||||||
| project_client = self.create_client(operation_group="agents", **kwargs) | ||||||
| openai_client = project_client.get_openai_client() | ||||||
|
|
||||||
| # Create data file | ||||||
| txt_content = "Sample data: 10, 20, 30, 40, 50" | ||||||
| vector_store = openai_client.vector_stores.create(name="DataStore") | ||||||
|
|
||||||
| txt_file = BytesIO(txt_content.encode("utf-8")) | ||||||
| txt_file.name = "data.txt" | ||||||
|
|
||||||
| file = openai_client.vector_stores.files.upload_and_poll( | ||||||
| vector_store_id=vector_store.id, | ||||||
| file=txt_file, | ||||||
| ) | ||||||
| print(f"File uploaded (id: {file.id})") | ||||||
|
|
||||||
| # Create agent | ||||||
| agent = project_client.agents.create_version( | ||||||
| agent_name="file-search-code-agent", | ||||||
| definition=PromptAgentDefinition( | ||||||
| model=model, | ||||||
| instructions="Find data and analyze it.", | ||||||
| tools=[ | ||||||
| FileSearchTool(vector_store_ids=[vector_store.id]), | ||||||
| CodeInterpreterTool(container=CodeInterpreterToolAuto()), | ||||||
| ], | ||||||
| ), | ||||||
| description="Agent with File Search and Code Interpreter.", | ||||||
| ) | ||||||
| print(f"Agent created (id: {agent.id})") | ||||||
|
|
||||||
| # Use the agent | ||||||
| response = openai_client.responses.create( | ||||||
| input="Find the data file and calculate the average.", | ||||||
| extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, | ||||||
| ) | ||||||
| print(f"Response received (id: {response.id})") | ||||||
|
|
||||||
| assert response.id is not None | ||||||
| assert len(response.output_text) > 20 | ||||||
| print("✓ File Search + Code Interpreter works!") | ||||||
|
|
||||||
| # Cleanup | ||||||
| project_client.agents.delete_version(agent_name=agent.name, agent_version=agent.version) | ||||||
| openai_client.vector_stores.delete(vector_store.id) | ||||||
|
|
||||||
| @servicePreparer() | ||||||
| @pytest.mark.skipif( | ||||||
| condition=(not is_live_and_not_recording()), | ||||||
| reason="Skipped because we cannot record network calls with OpenAI client", | ||||||
| ) | ||||||
| def test_analyze_code_file(self, **kwargs): | ||||||
| """ | ||||||
| Test finding code file and analyzing it. | ||||||
| """ | ||||||
|
|
||||||
| model = self.test_agents_params["model_deployment_name"] | ||||||
|
|
||||||
| # Setup | ||||||
| project_client = self.create_client(operation_group="agents", **kwargs) | ||||||
| openai_client = project_client.get_openai_client() | ||||||
|
|
||||||
| # Create Python code file | ||||||
| python_code = """def fibonacci(n): | ||||||
| if n <= 1: | ||||||
| return n | ||||||
| return fibonacci(n-1) + fibonacci(n-2) | ||||||
|
|
||||||
| result = fibonacci(10) | ||||||
| print(f"Fibonacci(10) = {result}") | ||||||
| """ | ||||||
|
|
||||||
| vector_store = openai_client.vector_stores.create(name="CodeAnalysisStore") | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. after test is done, you need to delete
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is deleted on line 156: It does have a problem of not being deleted if test fails, but as discussed in other comment, this is a bigger issue across the whole test suite. |
||||||
|
|
||||||
| code_file = BytesIO(python_code.encode("utf-8")) | ||||||
| code_file.name = "fibonacci.py" | ||||||
|
|
||||||
| file = openai_client.vector_stores.files.upload_and_poll( | ||||||
| vector_store_id=vector_store.id, | ||||||
| file=code_file, | ||||||
| ) | ||||||
| print(f"Code file uploaded (id: {file.id})") | ||||||
|
|
||||||
| # Create agent | ||||||
| agent = project_client.agents.create_version( | ||||||
| agent_name="file-search-code-analysis-agent", | ||||||
| definition=PromptAgentDefinition( | ||||||
| model=model, | ||||||
| instructions="Find code files and analyze them. You can run code to test it.", | ||||||
| tools=[ | ||||||
| FileSearchTool(vector_store_ids=[vector_store.id]), | ||||||
| CodeInterpreterTool(container=CodeInterpreterToolAuto()), | ||||||
| ], | ||||||
| ), | ||||||
| description="Agent for code analysis.", | ||||||
| ) | ||||||
| print(f"Agent created (id: {agent.id})") | ||||||
|
|
||||||
| # Request analysis | ||||||
| response = openai_client.responses.create( | ||||||
| input="Find the fibonacci code and explain what it does. What is the computational complexity?", | ||||||
| extra_body={"agent": {"name": agent.name, "type": "agent_reference"}}, | ||||||
| ) | ||||||
|
|
||||||
| response_text = response.output_text | ||||||
| print(f"Response: {response_text[:300]}...") | ||||||
|
|
||||||
| assert len(response_text) > 50 | ||||||
| response_lower = response_lower = response_text.lower() | ||||||
|
||||||
| response_lower = response_lower = response_text.lower() | |
| response_lower = response_text.lower() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just realized our agents SDK has the following problem and same here. I might want to do this as a new PR:
If assertion fail, delete won't be execute. Perhaps we can have declarator as a wrapper of the test that has a try-catch-final. In final, delete the agent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes I noticed this, I agree it should be done in another PR if we are going to do it. One thing I will say, is that this current behavior is handy when running locally because when a test fails, I still have the agent. I can then debug further by sending it more requests, including from the playground. I think whichever way we pick, it should be consistent across the tests.
Uh oh!
There was an error while loading. Please reload this page.