Skip to content

Commit b387ab1

Browse files
committed
Add evaluator subagent
1 parent 15ead0e commit b387ab1

File tree

4 files changed

+61
-0
lines changed

4 files changed

+61
-0
lines changed

apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/copilot/components/tool-call/tool-call.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,8 @@ function getSubagentLabels(toolName: string, isStreaming: boolean): string {
705705
return isStreaming ? 'Testing' : 'Tested'
706706
case 'deploy':
707707
return isStreaming ? 'Deploying' : 'Deployed'
708+
case 'evaluate':
709+
return isStreaming ? 'Evaluating' : 'Evaluated'
708710
case 'auth':
709711
return isStreaming ? 'Authenticating' : 'Authenticated'
710712
case 'research':
@@ -1487,6 +1489,7 @@ export function ToolCall({ toolCall: toolCallProp, toolCallId, onStateChange }:
14871489
'debug',
14881490
'test',
14891491
'deploy',
1492+
'evaluate',
14901493
'auth',
14911494
'research',
14921495
'knowledge',

apps/sim/lib/copilot/registry.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export const ToolIds = z.enum([
4242
'sleep',
4343
'get_block_outputs',
4444
'get_block_upstream_references',
45+
'evaluate',
4546
])
4647
export type ToolId = z.infer<typeof ToolIds>
4748

@@ -361,6 +362,10 @@ export const ToolArgSchemas = {
361362
'Array of block UUIDs. Returns all upstream references (block outputs and variables) accessible to each block based on workflow connections.'
362363
),
363364
}),
365+
366+
evaluate: z.object({
367+
instruction: z.string().describe('Instructions for what to evaluate'),
368+
}),
364369
} as const
365370
export type ToolArgSchemaMap = typeof ToolArgSchemas
366371

@@ -445,6 +450,7 @@ export const ToolSSESchemas = {
445450
'get_block_upstream_references',
446451
ToolArgSchemas.get_block_upstream_references
447452
),
453+
evaluate: toolCallSSEFor('evaluate', ToolArgSchemas.evaluate),
448454
} as const
449455
export type ToolSSESchemaMap = typeof ToolSSESchemas
450456

@@ -811,6 +817,10 @@ export const ToolResultSchemas = {
811817
})
812818
),
813819
}),
820+
evaluate: z.object({
821+
success: z.boolean(),
822+
message: z.string().optional(),
823+
}),
814824
} as const
815825
export type ToolResultSchemaMap = typeof ToolResultSchemas
816826

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import { ClipboardCheck, Loader2, XCircle } from 'lucide-react'
2+
import {
3+
BaseClientTool,
4+
type BaseClientToolMetadata,
5+
ClientToolCallState,
6+
} from '@/lib/copilot/tools/client/base-tool'
7+
8+
interface EvaluateArgs {
9+
instruction: string
10+
}
11+
12+
/**
13+
* Evaluate tool that spawns a subagent to evaluate workflows or outputs.
14+
* This tool auto-executes and the actual work is done by the evaluate subagent.
15+
* The subagent's output is streamed as nested content under this tool call.
16+
*/
17+
export class EvaluateClientTool extends BaseClientTool {
18+
static readonly id = 'evaluate'
19+
20+
constructor(toolCallId: string) {
21+
super(toolCallId, EvaluateClientTool.id, EvaluateClientTool.metadata)
22+
}
23+
24+
static readonly metadata: BaseClientToolMetadata = {
25+
displayNames: {
26+
[ClientToolCallState.generating]: { text: 'Evaluating', icon: Loader2 },
27+
[ClientToolCallState.pending]: { text: 'Evaluating', icon: Loader2 },
28+
[ClientToolCallState.executing]: { text: 'Evaluating', icon: Loader2 },
29+
[ClientToolCallState.success]: { text: 'Evaluated', icon: ClipboardCheck },
30+
[ClientToolCallState.error]: { text: 'Failed to evaluate', icon: XCircle },
31+
[ClientToolCallState.rejected]: { text: 'Evaluation skipped', icon: XCircle },
32+
[ClientToolCallState.aborted]: { text: 'Evaluation aborted', icon: XCircle },
33+
},
34+
}
35+
36+
/**
37+
* Execute the evaluate tool.
38+
* This just marks the tool as executing - the actual evaluation work is done server-side
39+
* by the evaluate subagent, and its output is streamed as subagent events.
40+
*/
41+
async execute(_args?: EvaluateArgs): Promise<void> {
42+
this.setState(ClientToolCallState.executing)
43+
}
44+
}
45+

apps/sim/stores/panel/copilot/store.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import { CustomToolClientTool } from '@/lib/copilot/tools/client/other/custom-to
3131
import { DebugClientTool } from '@/lib/copilot/tools/client/other/debug'
3232
import { DeployClientTool } from '@/lib/copilot/tools/client/other/deploy'
3333
import { EditClientTool } from '@/lib/copilot/tools/client/other/edit'
34+
import { EvaluateClientTool } from '@/lib/copilot/tools/client/other/evaluate'
3435
import { InfoClientTool } from '@/lib/copilot/tools/client/other/info'
3536
import { KnowledgeClientTool } from '@/lib/copilot/tools/client/other/knowledge'
3637
import { MakeApiRequestClientTool } from '@/lib/copilot/tools/client/other/make-api-request'
@@ -98,6 +99,7 @@ const CLIENT_TOOL_INSTANTIATORS: Record<string, (id: string) => any> = {
9899
debug: (id) => new DebugClientTool(id),
99100
test: (id) => new TestClientTool(id),
100101
deploy: (id) => new DeployClientTool(id),
102+
evaluate: (id) => new EvaluateClientTool(id),
101103
auth: (id) => new AuthClientTool(id),
102104
research: (id) => new ResearchClientTool(id),
103105
knowledge: (id) => new KnowledgeClientTool(id),
@@ -155,6 +157,7 @@ export const CLASS_TOOL_METADATA: Record<string, BaseClientToolMetadata | undefi
155157
debug: (DebugClientTool as any)?.metadata,
156158
test: (TestClientTool as any)?.metadata,
157159
deploy: (DeployClientTool as any)?.metadata,
160+
evaluate: (EvaluateClientTool as any)?.metadata,
158161
auth: (AuthClientTool as any)?.metadata,
159162
research: (ResearchClientTool as any)?.metadata,
160163
knowledge: (KnowledgeClientTool as any)?.metadata,

0 commit comments

Comments
 (0)