Skip to content

Commit 3ebe9f0

Browse files
authored
[Agents] Computer Use Tool (#42483)
* initial code gen * update sample * start merge with main * Update computer use sample, add fake screenshots * run black * remove double space * sample comments * changelog * add note to sample on approval requirement * fix mypy issue * add computer use test * fix mypy
1 parent b87b4ac commit 3ebe9f0

17 files changed

+1339
-49
lines changed

sdk/ai/azure-ai-agents/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
### Features Added
1010

11+
- Added Computer Use Preview tool for use with the computer-use-preview model
1112
- Added static `merge_resources` method to `McpTool` with accompanying sample.
1213

1314
### Bugs Fixed
@@ -18,6 +19,7 @@
1819

1920
### Sample updates
2021

22+
- Added Computer Use Preview tool sample demonstrating use with the computer-use-preview model
2123
- Added sample demonstrating multiple McpTool instance usage.
2224

2325
## 1.2.0b3 (2025-08-22)

sdk/ai/azure-ai-agents/apiview-properties.json

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,23 @@
3030
"azure.ai.agents.models.BrowserAutomationToolConnectionParameters": "Azure.AI.Agents.BrowserAutomationToolConnectionParameters",
3131
"azure.ai.agents.models.BrowserAutomationToolDefinition": "Azure.AI.Agents.BrowserAutomationToolDefinition",
3232
"azure.ai.agents.models.BrowserAutomationToolParameters": "Azure.AI.Agents.BrowserAutomationToolParameters",
33+
"azure.ai.agents.models.ComputerUseAction": "Azure.AI.Agents.ComputerUseAction",
34+
"azure.ai.agents.models.ClickAction": "Azure.AI.Agents.ClickAction",
3335
"azure.ai.agents.models.CodeInterpreterToolDefinition": "Azure.AI.Agents.CodeInterpreterToolDefinition",
3436
"azure.ai.agents.models.CodeInterpreterToolResource": "Azure.AI.Agents.CodeInterpreterToolResource",
37+
"azure.ai.agents.models.ComputerScreenshot": "Azure.AI.Agents.ComputerScreenshot",
38+
"azure.ai.agents.models.ToolOutput": "Azure.AI.Agents.ToolOutput",
39+
"azure.ai.agents.models.ComputerToolOutput": "Azure.AI.Agents.ComputerToolOutput",
40+
"azure.ai.agents.models.ComputerUseToolDefinition": "Azure.AI.Agents.ComputerUseToolDefinition",
41+
"azure.ai.agents.models.ComputerUseToolParameters": "Azure.AI.Agents.ComputerUseToolParameters",
3542
"azure.ai.agents.models.ConnectedAgentDetails": "Azure.AI.Agents.ConnectedAgentDetails",
3643
"azure.ai.agents.models.ConnectedAgentToolDefinition": "Azure.AI.Agents.ConnectedAgentToolDefinition",
44+
"azure.ai.agents.models.CoordinatePoint": "Azure.AI.Agents.CoordinatePoint",
3745
"azure.ai.agents.models.DeepResearchBingGroundingConnection": "Azure.AI.Agents.DeepResearchBingGroundingConnection",
3846
"azure.ai.agents.models.DeepResearchDetails": "Azure.AI.Agents.DeepResearchDetails",
3947
"azure.ai.agents.models.DeepResearchToolDefinition": "Azure.AI.Agents.DeepResearchToolDefinition",
48+
"azure.ai.agents.models.DoubleClickAction": "Azure.AI.Agents.DoubleClickAction",
49+
"azure.ai.agents.models.DragAction": "Azure.AI.Agents.DragAction",
4050
"azure.ai.agents.models.FabricDataAgentToolParameters": "Azure.AI.Agents.FabricDataAgentToolParameters",
4151
"azure.ai.agents.models.FileInfo": "Azure.AI.Agents.FileInfo",
4252
"azure.ai.agents.models.FileListResponse": "Azure.AI.Agents.FileListResponse",
@@ -49,7 +59,9 @@
4959
"azure.ai.agents.models.FunctionDefinition": "Azure.AI.Agents.FunctionDefinition",
5060
"azure.ai.agents.models.FunctionName": "Azure.AI.Agents.FunctionName",
5161
"azure.ai.agents.models.FunctionToolDefinition": "Azure.AI.Agents.FunctionToolDefinition",
62+
"azure.ai.agents.models.FunctionToolOutput": "Azure.AI.Agents.FunctionToolOutput",
5263
"azure.ai.agents.models.IncompleteRunDetails": "Azure.AI.Agents.IncompleteRunDetails",
64+
"azure.ai.agents.models.KeyPressAction": "Azure.AI.Agents.KeyPressAction",
5365
"azure.ai.agents.models.MCPApprovalPerTool": "Azure.AI.Agents.MCPApprovalPerTool",
5466
"azure.ai.agents.models.MCPToolDefinition": "Azure.AI.Agents.MCPToolDefinition",
5567
"azure.ai.agents.models.MCPToolList": "Azure.AI.Agents.MCPToolList",
@@ -89,6 +101,7 @@
89101
"azure.ai.agents.models.MessageTextUrlCitationAnnotation": "Azure.AI.Agents.MessageTextUrlCitationAnnotation",
90102
"azure.ai.agents.models.MessageTextUrlCitationDetails": "Azure.AI.Agents.MessageTextUrlCitationDetails",
91103
"azure.ai.agents.models.MicrosoftFabricToolDefinition": "Azure.AI.Agents.MicrosoftFabricToolDefinition",
104+
"azure.ai.agents.models.MoveAction": "Azure.AI.Agents.MoveAction",
92105
"azure.ai.agents.models.OpenApiAuthDetails": "Azure.AI.Agents.OpenApiAuthDetails",
93106
"azure.ai.agents.models.OpenApiAnonymousAuthDetails": "Azure.AI.Agents.OpenApiAnonymousAuthDetails",
94107
"azure.ai.agents.models.OpenApiConnectionAuthDetails": "Azure.AI.Agents.OpenApiConnectionAuthDetails",
@@ -99,6 +112,8 @@
99112
"azure.ai.agents.models.OpenApiToolDefinition": "Azure.AI.Agents.OpenApiToolDefinition",
100113
"azure.ai.agents.models.RequiredAction": "Azure.AI.Agents.RequiredAction",
101114
"azure.ai.agents.models.RequiredToolCall": "Azure.AI.Agents.RequiredToolCall",
115+
"azure.ai.agents.models.RequiredComputerUseToolCall": "Azure.AI.Agents.RequiredComputerUseToolCall",
116+
"azure.ai.agents.models.RequiredComputerUseToolCallDetails": "Azure.AI.Agents.RequiredComputerUseToolCallDetails",
102117
"azure.ai.agents.models.RequiredFunctionToolCall": "Azure.AI.Agents.RequiredFunctionToolCall",
103118
"azure.ai.agents.models.RequiredFunctionToolCallDetails": "Azure.AI.Agents.RequiredFunctionToolCallDetails",
104119
"azure.ai.agents.models.RequiredMcpToolCall": "Azure.AI.Agents.RequiredMcpToolCall",
@@ -122,6 +137,8 @@
122137
"azure.ai.agents.models.RunStepCodeInterpreterToolCall": "Azure.AI.Agents.RunStepCodeInterpreterToolCall",
123138
"azure.ai.agents.models.RunStepCodeInterpreterToolCallDetails": "Azure.AI.Agents.RunStepCodeInterpreterToolCallDetails",
124139
"azure.ai.agents.models.RunStepCompletionUsage": "Azure.AI.Agents.RunStepCompletionUsage",
140+
"azure.ai.agents.models.RunStepComputerUseToolCall": "Azure.AI.Agents.RunStepComputerUseToolCall",
141+
"azure.ai.agents.models.RunStepComputerUseToolCallDetails": "Azure.AI.Agents.RunStepComputerUseToolCallDetails",
125142
"azure.ai.agents.models.RunStepConnectedAgent": "Azure.AI.Agents.RunStepConnectedAgent",
126143
"azure.ai.agents.models.RunStepConnectedAgentToolCall": "Azure.AI.Agents.RunStepConnectedAgentToolCall",
127144
"azure.ai.agents.models.RunStepDeepResearchToolCall": "Azure.AI.Agents.RunStepDeepResearchToolCall",
@@ -168,6 +185,9 @@
168185
"azure.ai.agents.models.RunStepOpenAPIToolCall": "Azure.AI.Agents.RunStepOpenAPIToolCall",
169186
"azure.ai.agents.models.RunStepSharepointToolCall": "Azure.AI.Agents.RunStepSharepointToolCall",
170187
"azure.ai.agents.models.RunStepToolCallDetails": "Azure.AI.Agents.RunStepToolCallDetails",
188+
"azure.ai.agents.models.SafetyCheck": "Azure.AI.Agents.SafetyCheck",
189+
"azure.ai.agents.models.ScreenshotAction": "Azure.AI.Agents.ScreenshotAction",
190+
"azure.ai.agents.models.ScrollAction": "Azure.AI.Agents.ScrollAction",
171191
"azure.ai.agents.models.SharepointGroundingToolParameters": "Azure.AI.Agents.SharepointGroundingToolParameters",
172192
"azure.ai.agents.models.SharepointToolDefinition": "Azure.AI.Agents.SharepointToolDefinition",
173193
"azure.ai.agents.models.SubmitToolApprovalAction": "Azure.AI.Agents.SubmitToolApprovalAction",
@@ -179,9 +199,9 @@
179199
"azure.ai.agents.models.ThreadRun": "Azure.AI.Agents.ThreadRun",
180200
"azure.ai.agents.models.ToolApproval": "Azure.AI.Agents.ToolApproval",
181201
"azure.ai.agents.models.ToolConnection": "Azure.AI.Agents.ToolConnection",
182-
"azure.ai.agents.models.ToolOutput": "Azure.AI.Agents.ToolOutput",
183202
"azure.ai.agents.models.ToolResources": "Azure.AI.Agents.ToolResources",
184203
"azure.ai.agents.models.TruncationObject": "Azure.AI.Agents.TruncationObject",
204+
"azure.ai.agents.models.TypeAction": "Azure.AI.Agents.TypeAction",
185205
"azure.ai.agents.models.VectorStore": "Azure.AI.Agents.VectorStore",
186206
"azure.ai.agents.models.VectorStoreChunkingStrategyRequest": "Azure.AI.Agents.VectorStoreChunkingStrategyRequest",
187207
"azure.ai.agents.models.VectorStoreAutoChunkingStrategyRequest": "Azure.AI.Agents.VectorStoreAutoChunkingStrategyRequest",
@@ -198,16 +218,19 @@
198218
"azure.ai.agents.models.VectorStoreStaticChunkingStrategyOptions": "Azure.AI.Agents.VectorStoreStaticChunkingStrategyOptions",
199219
"azure.ai.agents.models.VectorStoreStaticChunkingStrategyRequest": "Azure.AI.Agents.VectorStoreStaticChunkingStrategyRequest",
200220
"azure.ai.agents.models.VectorStoreStaticChunkingStrategyResponse": "Azure.AI.Agents.VectorStoreStaticChunkingStrategyResponse",
221+
"azure.ai.agents.models.WaitAction": "Azure.AI.Agents.WaitAction",
201222
"azure.ai.agents.models.VectorStoreDataSourceAssetType": "Azure.AI.Agents.VectorStoreDataSourceAssetType",
202223
"azure.ai.agents.models.AzureAISearchQueryType": "Azure.AI.Agents.AzureAISearchQueryType",
203224
"azure.ai.agents.models.MessageRole": "Azure.AI.Agents.MessageRole",
204225
"azure.ai.agents.models.MessageBlockType": "Azure.AI.Agents.MessageBlockType",
205226
"azure.ai.agents.models.ImageDetailLevel": "Azure.AI.Agents.ImageDetailLevel",
227+
"azure.ai.agents.models.ComputerUseEnvironment": "Azure.AI.Agents.ComputerUseEnvironment",
206228
"azure.ai.agents.models.OpenApiAuthType": "Azure.AI.Agents.OpenApiAuthType",
207229
"azure.ai.agents.models.ListSortOrder": "Azure.AI.Agents.ListSortOrder",
208230
"azure.ai.agents.models.MessageStatus": "Azure.AI.Agents.MessageStatus",
209231
"azure.ai.agents.models.MessageIncompleteDetailsReason": "Azure.AI.Agents.MessageIncompleteDetailsReason",
210232
"azure.ai.agents.models.RunStatus": "Azure.AI.Agents.RunStatus",
233+
"azure.ai.agents.models.MouseButton": "Azure.AI.Agents.MouseButton",
211234
"azure.ai.agents.models.IncompleteDetailsReason": "Azure.AI.Agents.IncompleteDetailsReason",
212235
"azure.ai.agents.models.TruncationStrategy": "Azure.AI.Agents.TruncationStrategy",
213236
"azure.ai.agents.models.AgentsToolChoiceOptionMode": "Azure.AI.Agents.AgentsToolChoiceOptionMode",

sdk/ai/azure-ai-agents/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/ai/azure-ai-agents",
5-
"Tag": "python/ai/azure-ai-agents_f5906767ec"
5+
"Tag": "python/ai/azure-ai-agents_ce4eea88ee"
66
}

sdk/ai/azure-ai-agents/azure/ai/agents/models/__init__.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,22 @@
4242
BrowserAutomationToolConnectionParameters,
4343
BrowserAutomationToolDefinition,
4444
BrowserAutomationToolParameters,
45+
ClickAction,
4546
CodeInterpreterToolDefinition,
4647
CodeInterpreterToolResource,
48+
ComputerScreenshot,
49+
ComputerToolOutput,
50+
ComputerUseAction,
51+
ComputerUseToolDefinition,
52+
ComputerUseToolParameters,
4753
ConnectedAgentDetails,
4854
ConnectedAgentToolDefinition,
55+
CoordinatePoint,
4956
DeepResearchBingGroundingConnection,
5057
DeepResearchDetails,
5158
DeepResearchToolDefinition,
59+
DoubleClickAction,
60+
DragAction,
5261
FabricDataAgentToolParameters,
5362
FileInfo,
5463
FileListResponse,
@@ -61,7 +70,9 @@
6170
FunctionDefinition,
6271
FunctionName,
6372
FunctionToolDefinition,
73+
FunctionToolOutput,
6474
IncompleteRunDetails,
75+
KeyPressAction,
6576
MCPApprovalPerTool,
6677
MCPToolDefinition,
6778
MCPToolList,
@@ -101,6 +112,7 @@
101112
MessageTextUrlCitationAnnotation,
102113
MessageTextUrlCitationDetails,
103114
MicrosoftFabricToolDefinition,
115+
MoveAction,
104116
OpenApiAnonymousAuthDetails,
105117
OpenApiAuthDetails,
106118
OpenApiConnectionAuthDetails,
@@ -110,6 +122,8 @@
110122
OpenApiManagedSecurityScheme,
111123
OpenApiToolDefinition,
112124
RequiredAction,
125+
RequiredComputerUseToolCall,
126+
RequiredComputerUseToolCallDetails,
113127
RequiredFunctionToolCall,
114128
RequiredFunctionToolCallDetails,
115129
RequiredMcpToolCall,
@@ -132,6 +146,8 @@
132146
RunStepCodeInterpreterToolCallDetails,
133147
RunStepCodeInterpreterToolCallOutput,
134148
RunStepCompletionUsage,
149+
RunStepComputerUseToolCall,
150+
RunStepComputerUseToolCallDetails,
135151
RunStepConnectedAgent,
136152
RunStepConnectedAgentToolCall,
137153
RunStepDeepResearchToolCall,
@@ -180,6 +196,9 @@
180196
RunStepSharepointToolCall,
181197
RunStepToolCall,
182198
RunStepToolCallDetails,
199+
SafetyCheck,
200+
ScreenshotAction,
201+
ScrollAction,
183202
SharepointGroundingToolParameters,
184203
SharepointToolDefinition,
185204
SubmitToolApprovalAction,
@@ -195,6 +214,7 @@
195214
ToolOutput,
196215
ToolResources,
197216
TruncationObject,
217+
TypeAction,
198218
VectorStore,
199219
VectorStoreAutoChunkingStrategyRequest,
200220
VectorStoreAutoChunkingStrategyResponse,
@@ -211,6 +231,7 @@
211231
VectorStoreStaticChunkingStrategyOptions,
212232
VectorStoreStaticChunkingStrategyRequest,
213233
VectorStoreStaticChunkingStrategyResponse,
234+
WaitAction,
214235
)
215236

216237
from ._enums import ( # type: ignore
@@ -219,6 +240,7 @@
219240
AgentsResponseFormatMode,
220241
AgentsToolChoiceOptionMode,
221242
AzureAISearchQueryType,
243+
ComputerUseEnvironment,
222244
DoneEvent,
223245
ErrorEvent,
224246
FilePurpose,
@@ -231,6 +253,7 @@
231253
MessageRole,
232254
MessageStatus,
233255
MessageStreamEvent,
256+
MouseButton,
234257
OpenApiAuthType,
235258
ResponseFormat,
236259
RunAdditionalFieldList,
@@ -285,13 +308,22 @@
285308
"BrowserAutomationToolConnectionParameters",
286309
"BrowserAutomationToolDefinition",
287310
"BrowserAutomationToolParameters",
311+
"ClickAction",
288312
"CodeInterpreterToolDefinition",
289313
"CodeInterpreterToolResource",
314+
"ComputerScreenshot",
315+
"ComputerToolOutput",
316+
"ComputerUseAction",
317+
"ComputerUseToolDefinition",
318+
"ComputerUseToolParameters",
290319
"ConnectedAgentDetails",
291320
"ConnectedAgentToolDefinition",
321+
"CoordinatePoint",
292322
"DeepResearchBingGroundingConnection",
293323
"DeepResearchDetails",
294324
"DeepResearchToolDefinition",
325+
"DoubleClickAction",
326+
"DragAction",
295327
"FabricDataAgentToolParameters",
296328
"FileInfo",
297329
"FileListResponse",
@@ -304,7 +336,9 @@
304336
"FunctionDefinition",
305337
"FunctionName",
306338
"FunctionToolDefinition",
339+
"FunctionToolOutput",
307340
"IncompleteRunDetails",
341+
"KeyPressAction",
308342
"MCPApprovalPerTool",
309343
"MCPToolDefinition",
310344
"MCPToolList",
@@ -344,6 +378,7 @@
344378
"MessageTextUrlCitationAnnotation",
345379
"MessageTextUrlCitationDetails",
346380
"MicrosoftFabricToolDefinition",
381+
"MoveAction",
347382
"OpenApiAnonymousAuthDetails",
348383
"OpenApiAuthDetails",
349384
"OpenApiConnectionAuthDetails",
@@ -353,6 +388,8 @@
353388
"OpenApiManagedSecurityScheme",
354389
"OpenApiToolDefinition",
355390
"RequiredAction",
391+
"RequiredComputerUseToolCall",
392+
"RequiredComputerUseToolCallDetails",
356393
"RequiredFunctionToolCall",
357394
"RequiredFunctionToolCallDetails",
358395
"RequiredMcpToolCall",
@@ -375,6 +412,8 @@
375412
"RunStepCodeInterpreterToolCallDetails",
376413
"RunStepCodeInterpreterToolCallOutput",
377414
"RunStepCompletionUsage",
415+
"RunStepComputerUseToolCall",
416+
"RunStepComputerUseToolCallDetails",
378417
"RunStepConnectedAgent",
379418
"RunStepConnectedAgentToolCall",
380419
"RunStepDeepResearchToolCall",
@@ -423,6 +462,9 @@
423462
"RunStepSharepointToolCall",
424463
"RunStepToolCall",
425464
"RunStepToolCallDetails",
465+
"SafetyCheck",
466+
"ScreenshotAction",
467+
"ScrollAction",
426468
"SharepointGroundingToolParameters",
427469
"SharepointToolDefinition",
428470
"SubmitToolApprovalAction",
@@ -438,6 +480,7 @@
438480
"ToolOutput",
439481
"ToolResources",
440482
"TruncationObject",
483+
"TypeAction",
441484
"VectorStore",
442485
"VectorStoreAutoChunkingStrategyRequest",
443486
"VectorStoreAutoChunkingStrategyResponse",
@@ -454,11 +497,13 @@
454497
"VectorStoreStaticChunkingStrategyOptions",
455498
"VectorStoreStaticChunkingStrategyRequest",
456499
"VectorStoreStaticChunkingStrategyResponse",
500+
"WaitAction",
457501
"AgentStreamEvent",
458502
"AgentsNamedToolChoiceType",
459503
"AgentsResponseFormatMode",
460504
"AgentsToolChoiceOptionMode",
461505
"AzureAISearchQueryType",
506+
"ComputerUseEnvironment",
462507
"DoneEvent",
463508
"ErrorEvent",
464509
"FilePurpose",
@@ -471,6 +516,7 @@
471516
"MessageRole",
472517
"MessageStatus",
473518
"MessageStreamEvent",
519+
"MouseButton",
474520
"OpenApiAuthType",
475521
"ResponseFormat",
476522
"RunAdditionalFieldList",

sdk/ai/azure-ai-agents/azure/ai/agents/models/_enums.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ class AgentsNamedToolChoiceType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
3535
"""Tool type ``deep_research``"""
3636
MCP = "mcp"
3737
"""Tool type ``mcp``"""
38+
COMPUTER_USE_PREVIEW = "computer_use_preview"
39+
"""Tool type ``computer_use_preview``"""
3840

3941

4042
class AgentsResponseFormatMode(str, Enum, metaclass=CaseInsensitiveEnumMeta):
@@ -150,6 +152,19 @@ class AzureAISearchQueryType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
150152
"""Query type ``vector_semantic_hybrid``"""
151153

152154

155+
class ComputerUseEnvironment(str, Enum, metaclass=CaseInsensitiveEnumMeta):
156+
"""The environment types supported by the computer use tool."""
157+
158+
WINDOWS = "windows"
159+
"""Windows environment"""
160+
MAC = "mac"
161+
"""Mac environment"""
162+
LINUX = "linux"
163+
"""Linux environment"""
164+
BROWSER = "browser"
165+
"""Browser environment"""
166+
167+
153168
class DoneEvent(str, Enum, metaclass=CaseInsensitiveEnumMeta):
154169
"""Terminal event indicating the successful end of a stream."""
155170

@@ -299,6 +314,21 @@ class MessageStreamEvent(str, Enum, metaclass=CaseInsensitiveEnumMeta):
299314
"""Event sent before a message is completed. The data of this event is of type ThreadMessage"""
300315

301316

317+
class MouseButton(str, Enum, metaclass=CaseInsensitiveEnumMeta):
318+
"""The mouse button types supported by click actions."""
319+
320+
LEFT = "left"
321+
"""Left mouse button"""
322+
RIGHT = "right"
323+
"""Right mouse button"""
324+
WHEEL = "wheel"
325+
"""Mouse wheel button"""
326+
BACK = "back"
327+
"""Back mouse button"""
328+
FORWARD = "forward"
329+
"""Forward mouse button"""
330+
331+
302332
class OpenApiAuthType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
303333
"""Authentication type for OpenApi endpoint. Allowed types are:
304334

0 commit comments

Comments
 (0)