Add evaluator subagent

Sg312 · Sg312 · commit b387ab12f3f6 · 2026-01-08T17:55:01.000-08:00
diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/copilot/components/tool-call/tool-call.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/copilot/components/tool-call/tool-call.tsx
@@ -705,6 +705,8 @@ function getSubagentLabels(toolName: string, isStreaming: boolean): string {
       return isStreaming ? 'Testing' : 'Tested'
     case 'deploy':
       return isStreaming ? 'Deploying' : 'Deployed'
+    case 'evaluate':
+      return isStreaming ? 'Evaluating' : 'Evaluated'
     case 'auth':
       return isStreaming ? 'Authenticating' : 'Authenticated'
     case 'research':
@@ -1487,6 +1489,7 @@ export function ToolCall({ toolCall: toolCallProp, toolCallId, onStateChange }:
     'debug',
     'test',
     'deploy',
+    'evaluate',
     'auth',
     'research',
     'knowledge',
diff --git a/apps/sim/lib/copilot/registry.ts b/apps/sim/lib/copilot/registry.ts
@@ -42,6 +42,7 @@ export const ToolIds = z.enum([
   'sleep',
   'get_block_outputs',
   'get_block_upstream_references',
+  'evaluate',
 ])
 export type ToolId = z.infer<typeof ToolIds>
 
@@ -361,6 +362,10 @@ export const ToolArgSchemas = {
         'Array of block UUIDs. Returns all upstream references (block outputs and variables) accessible to each block based on workflow connections.'
       ),
   }),
+
+  evaluate: z.object({
+    instruction: z.string().describe('Instructions for what to evaluate'),
+  }),
 } as const
 export type ToolArgSchemaMap = typeof ToolArgSchemas
 
@@ -445,6 +450,7 @@ export const ToolSSESchemas = {
     'get_block_upstream_references',
     ToolArgSchemas.get_block_upstream_references
   ),
+  evaluate: toolCallSSEFor('evaluate', ToolArgSchemas.evaluate),
 } as const
 export type ToolSSESchemaMap = typeof ToolSSESchemas
 
@@ -811,6 +817,10 @@ export const ToolResultSchemas = {
       })
     ),
   }),
+  evaluate: z.object({
+    success: z.boolean(),
+    message: z.string().optional(),
+  }),
 } as const
 export type ToolResultSchemaMap = typeof ToolResultSchemas
 
diff --git a/apps/sim/lib/copilot/tools/client/other/evaluate.ts b/apps/sim/lib/copilot/tools/client/other/evaluate.ts
@@ -0,0 +1,45 @@
+import { ClipboardCheck, Loader2, XCircle } from 'lucide-react'
+import {
+  BaseClientTool,
+  type BaseClientToolMetadata,
+  ClientToolCallState,
+} from '@/lib/copilot/tools/client/base-tool'
+
+interface EvaluateArgs {
+  instruction: string
+}
+
+/**
+ * Evaluate tool that spawns a subagent to evaluate workflows or outputs.
+ * This tool auto-executes and the actual work is done by the evaluate subagent.
+ * The subagent's output is streamed as nested content under this tool call.
+ */
+export class EvaluateClientTool extends BaseClientTool {
+  static readonly id = 'evaluate'
+
+  constructor(toolCallId: string) {
+    super(toolCallId, EvaluateClientTool.id, EvaluateClientTool.metadata)
+  }
+
+  static readonly metadata: BaseClientToolMetadata = {
+    displayNames: {
+      [ClientToolCallState.generating]: { text: 'Evaluating', icon: Loader2 },
+      [ClientToolCallState.pending]: { text: 'Evaluating', icon: Loader2 },
+      [ClientToolCallState.executing]: { text: 'Evaluating', icon: Loader2 },
+      [ClientToolCallState.success]: { text: 'Evaluated', icon: ClipboardCheck },
+      [ClientToolCallState.error]: { text: 'Failed to evaluate', icon: XCircle },
+      [ClientToolCallState.rejected]: { text: 'Evaluation skipped', icon: XCircle },
+      [ClientToolCallState.aborted]: { text: 'Evaluation aborted', icon: XCircle },
+    },
+  }
+
+  /**
+   * Execute the evaluate tool.
+   * This just marks the tool as executing - the actual evaluation work is done server-side
+   * by the evaluate subagent, and its output is streamed as subagent events.
+   */
+  async execute(_args?: EvaluateArgs): Promise<void> {
+    this.setState(ClientToolCallState.executing)
+  }
+}
+
diff --git a/apps/sim/stores/panel/copilot/store.ts b/apps/sim/stores/panel/copilot/store.ts
@@ -31,6 +31,7 @@ import { CustomToolClientTool } from '@/lib/copilot/tools/client/other/custom-to
 import { DebugClientTool } from '@/lib/copilot/tools/client/other/debug'
 import { DeployClientTool } from '@/lib/copilot/tools/client/other/deploy'
 import { EditClientTool } from '@/lib/copilot/tools/client/other/edit'
+import { EvaluateClientTool } from '@/lib/copilot/tools/client/other/evaluate'
 import { InfoClientTool } from '@/lib/copilot/tools/client/other/info'
 import { KnowledgeClientTool } from '@/lib/copilot/tools/client/other/knowledge'
 import { MakeApiRequestClientTool } from '@/lib/copilot/tools/client/other/make-api-request'
@@ -98,6 +99,7 @@ const CLIENT_TOOL_INSTANTIATORS: Record<string, (id: string) => any> = {
   debug: (id) => new DebugClientTool(id),
   test: (id) => new TestClientTool(id),
   deploy: (id) => new DeployClientTool(id),
+  evaluate: (id) => new EvaluateClientTool(id),
   auth: (id) => new AuthClientTool(id),
   research: (id) => new ResearchClientTool(id),
   knowledge: (id) => new KnowledgeClientTool(id),
@@ -155,6 +157,7 @@ export const CLASS_TOOL_METADATA: Record<string, BaseClientToolMetadata | undefi
   debug: (DebugClientTool as any)?.metadata,
   test: (TestClientTool as any)?.metadata,
   deploy: (DeployClientTool as any)?.metadata,
+  evaluate: (EvaluateClientTool as any)?.metadata,
   auth: (AuthClientTool as any)?.metadata,
   research: (ResearchClientTool as any)?.metadata,
   knowledge: (KnowledgeClientTool as any)?.metadata,