Skip to content

Commit a51b970

Browse files
committed
Convert from JsonSchema7Type to ZodSchema for tool parameters and returns (issue #56)
1 parent 6746351 commit a51b970

27 files changed

+953
-63
lines changed

.changeset/convert-to-zod.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"mycoder-agent": minor
3+
"mycoder": minor
4+
---
5+
6+
Convert from JsonSchema7Type to ZodSchema for tool parameters and returns, required for Vercel AI SDK integration.

packages/agent/src/core/toolAgent.test.ts

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { anthropic } from '@ai-sdk/anthropic';
22
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
3+
import { z } from 'zod';
34

45
import { MockLogger } from '../utils/mockLogger.js';
56

@@ -65,7 +66,11 @@ describe('toolAgent', () => {
6566
const mockTool: Tool = {
6667
name: 'mockTool',
6768
description: 'A mock tool for testing',
68-
parameters: {
69+
parameters: z.object({
70+
input: z.string().describe('Test input'),
71+
}),
72+
returns: z.string().describe('The processed result'),
73+
parametersJsonSchema: {
6974
type: 'object',
7075
properties: {
7176
input: {
@@ -75,7 +80,7 @@ describe('toolAgent', () => {
7580
},
7681
required: ['input'],
7782
},
78-
returns: {
83+
returnsJsonSchema: {
7984
type: 'string',
8085
description: 'The processed result',
8186
},
@@ -85,7 +90,11 @@ describe('toolAgent', () => {
8590
const sequenceCompleteTool: Tool = {
8691
name: 'sequenceComplete',
8792
description: 'Completes the sequence',
88-
parameters: {
93+
parameters: z.object({
94+
result: z.string().describe('The final result'),
95+
}),
96+
returns: z.string().describe('The final result'),
97+
parametersJsonSchema: {
8998
type: 'object',
9099
properties: {
91100
result: {
@@ -95,7 +104,7 @@ describe('toolAgent', () => {
95104
},
96105
required: ['result'],
97106
},
98-
returns: {
107+
returnsJsonSchema: {
99108
type: 'string',
100109
description: 'The final result',
101110
},
@@ -134,12 +143,14 @@ describe('toolAgent', () => {
134143
const errorTool: Tool = {
135144
name: 'errorTool',
136145
description: 'A tool that always fails',
137-
parameters: {
146+
parameters: z.object({}),
147+
returns: z.string().describe('Error message'),
148+
parametersJsonSchema: {
138149
type: 'object',
139150
properties: {},
140151
required: [],
141152
},
142-
returns: {
153+
returnsJsonSchema: {
143154
type: 'string',
144155
description: 'Error message',
145156
},

packages/agent/src/core/toolAgent.ts

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
generateText,
88
ToolResultPart,
99
ToolSet,
10+
tool as makeTool,
1011
} from 'ai';
1112
import chalk from 'chalk';
1213

@@ -287,20 +288,22 @@ export const toolAgent = async (
287288

288289
const toolSet: ToolSet = {};
289290
tools.forEach((tool) => {
290-
toolSet[tool.name] = {
291+
toolSet[tool.name] = makeTool({
291292
description: tool.description,
292293
parameters: tool.parameters,
293-
};
294+
});
294295
});
296+
console.log('toolSet', toolSet);
297+
const generateTextProps = {
298+
model: config.model,
299+
temperature: config.temperature,
300+
messages,
301+
system: systemPrompt,
302+
tools: toolSet,
303+
};
304+
console.log('generateTextProps', generateTextProps);
295305
const { text, reasoning, reasoningDetails, toolCalls, toolResults } =
296-
await generateText({
297-
model: config.model,
298-
temperature: config.temperature,
299-
messages,
300-
system: systemPrompt,
301-
tools: toolSet,
302-
toolChoice: 'auto',
303-
});
306+
await generateText(generateTextProps);
304307

305308
const localToolCalls: ToolUseContent[] = toolCalls.map((call) => ({
306309
type: 'tool_use',

packages/agent/src/core/types.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { z } from 'zod';
12
import { JsonSchema7Type } from 'zod-to-json-schema';
23

34
import { Logger } from '../utils/logger.js';
@@ -20,14 +21,18 @@ export type ToolContext = {
2021
export type Tool<TParams = Record<string, any>, TReturn = any> = {
2122
name: string;
2223
description: string;
23-
parameters: JsonSchema7Type;
24-
returns: JsonSchema7Type;
24+
parameters: z.ZodType<TParams>;
25+
returns: z.ZodType<TReturn>;
2526
logPrefix?: string;
2627

2728
logParameters?: (params: TParams, context: ToolContext) => void;
2829
logReturns?: (returns: TReturn, context: ToolContext) => void;
2930

3031
execute: (params: TParams, context: ToolContext) => Promise<TReturn>;
32+
33+
// Keep JsonSchema7Type for backward compatibility and Vercel AI SDK integration
34+
parametersJsonSchema?: JsonSchema7Type;
35+
returnsJsonSchema?: JsonSchema7Type;
3136
};
3237

3338
export type ToolCall = {

packages/agent/src/tools/browser/browseMessage.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,10 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
7070
name: 'browseMessage',
7171
logPrefix: '🏄',
7272
description: 'Performs actions in an active browser session',
73-
parameters: zodToJsonSchema(parameterSchema),
74-
returns: zodToJsonSchema(returnSchema),
73+
parameters: parameterSchema,
74+
parametersJsonSchema: zodToJsonSchema(parameterSchema),
75+
returns: returnSchema,
76+
returnsJsonSchema: zodToJsonSchema(returnSchema),
7577

7678
execute: async (
7779
{ instanceId, action },
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
import { z } from 'zod';
2+
import { zodToJsonSchema } from 'zod-to-json-schema';
3+
4+
import { Tool } from '../../core/types.js';
5+
import { errorToString } from '../../utils/errorToString.js';
6+
import { sleep } from '../../utils/sleep.js';
7+
8+
import { filterPageContent } from './filterPageContent.js';
9+
import { browserSessions, type BrowserAction, SelectorType } from './types.js';
10+
11+
// Schema for browser action
12+
const browserActionSchema = z
13+
.object({
14+
actionType: z.enum(['goto', 'click', 'type', 'wait', 'content', 'close']),
15+
url: z
16+
.string()
17+
.url()
18+
.optional()
19+
.describe('URL to navigate to if "goto" actionType'),
20+
selector: z
21+
.string()
22+
.optional()
23+
.describe('Selector to click if "click" actionType'),
24+
selectorType: z
25+
.nativeEnum(SelectorType)
26+
.optional()
27+
.describe('Type of selector if "click" actionType'),
28+
text: z
29+
.string()
30+
.optional()
31+
.describe(
32+
'Text to type if "type" actionType, for other actionType, this is ignored',
33+
),
34+
})
35+
.describe('Browser action to perform');
36+
37+
// Main parameter schema
38+
const parameterSchema = z.object({
39+
instanceId: z.string().describe('The ID returned by browseStart'),
40+
action: browserActionSchema,
41+
description: z
42+
.string()
43+
.max(80)
44+
.describe('The reason for this browser action (max 80 chars)'),
45+
});
46+
47+
// Return schema
48+
const returnSchema = z.object({
49+
status: z.string(),
50+
content: z.string().optional(),
51+
error: z.string().optional(),
52+
});
53+
54+
type Parameters = z.infer<typeof parameterSchema>;
55+
type ReturnType = z.infer<typeof returnSchema>;
56+
57+
// Helper function to handle selectors
58+
const getSelector = (selector: string, type?: SelectorType): string => {
59+
switch (type) {
60+
case SelectorType.XPATH:
61+
return `xpath=${selector}`;
62+
case SelectorType.TEXT:
63+
return `text=${selector}`;
64+
default:
65+
return selector; // CSS selector is default
66+
}
67+
};
68+
69+
export const browseMessageTool: Tool<Parameters, ReturnType> = {
70+
name: 'browseMessage',
71+
logPrefix: '🏄',
72+
description: 'Performs actions in an active browser session',
73+
parameters: zodToJsonSchema(parameterSchema),
74+
returns: zodToJsonSchema(returnSchema),
75+
76+
execute: async (
77+
{ instanceId, action },
78+
{ logger, pageFilter },
79+
): Promise<ReturnType> => {
80+
// Validate action format
81+
if (!action || typeof action !== 'object') {
82+
logger.error('Invalid action format: action must be an object');
83+
return {
84+
status: 'error',
85+
error: 'Invalid action format: action must be an object',
86+
};
87+
}
88+
89+
if (!action.actionType) {
90+
logger.error('Invalid action format: actionType is required');
91+
return {
92+
status: 'error',
93+
error: 'Invalid action format: actionType is required',
94+
};
95+
}
96+
97+
logger.verbose(`Executing browser action: ${action.actionType}`);
98+
logger.verbose(`Webpage processing mode: ${pageFilter}`);
99+
100+
try {
101+
const session = browserSessions.get(instanceId);
102+
if (!session) {
103+
throw new Error(`No browser session found with ID ${instanceId}`);
104+
}
105+
106+
const { page } = session;
107+
108+
switch (action.actionType) {
109+
case 'goto': {
110+
if (!action.url) {
111+
throw new Error('URL required for goto action');
112+
}
113+
114+
try {
115+
// Try with 'domcontentloaded' first which is more reliable than 'networkidle'
116+
logger.verbose(
117+
`Navigating to ${action.url} with 'domcontentloaded' waitUntil`,
118+
);
119+
await page.goto(action.url, { waitUntil: 'domcontentloaded' });
120+
await sleep(3000);
121+
const content = await filterPageContent(page, pageFilter);
122+
logger.verbose(`Content: ${content}`);
123+
logger.verbose(
124+
'Navigation completed with domcontentloaded strategy',
125+
);
126+
logger.verbose(`Content length: ${content.length} characters`);
127+
return { status: 'success', content };
128+
} catch (navError) {
129+
// If that fails, try with no waitUntil option
130+
logger.warn(
131+
`Failed with domcontentloaded strategy: ${errorToString(navError)}`,
132+
);
133+
logger.verbose(
134+
`Retrying navigation to ${action.url} with no waitUntil option`,
135+
);
136+
137+
try {
138+
await page.goto(action.url);
139+
await sleep(3000);
140+
const content = await filterPageContent(page, pageFilter);
141+
logger.verbose(`Content: ${content}`);
142+
logger.verbose('Navigation completed with basic strategy');
143+
return { status: 'success', content };
144+
} catch (innerError) {
145+
logger.error(
146+
`Failed with basic navigation strategy: ${errorToString(innerError)}`,
147+
);
148+
throw innerError; // Re-throw to be caught by outer catch block
149+
}
150+
}
151+
}
152+
153+
case 'click': {
154+
if (!action.selector) {
155+
throw new Error('Selector required for click action');
156+
}
157+
const clickSelector = getSelector(
158+
action.selector,
159+
action.selectorType,
160+
);
161+
await page.click(clickSelector);
162+
await sleep(1000); // Wait for any content changes after click
163+
const content = await filterPageContent(page, pageFilter);
164+
logger.verbose(
165+
`Click action completed on selector: ${clickSelector}`,
166+
);
167+
return { status: 'success', content };
168+
}
169+
170+
case 'type': {
171+
if (!action.selector || !action.text) {
172+
throw new Error('Selector and text required for type action');
173+
}
174+
const typeSelector = getSelector(
175+
action.selector,
176+
action.selectorType,
177+
);
178+
await page.fill(typeSelector, action.text);
179+
logger.verbose(`Type action completed on selector: ${typeSelector}`);
180+
return { status: 'success' };
181+
}
182+
183+
case 'wait': {
184+
if (!action.selector) {
185+
throw new Error('Selector required for wait action');
186+
}
187+
const waitSelector = getSelector(
188+
action.selector,
189+
action.selectorType,
190+
);
191+
await page.waitForSelector(waitSelector);
192+
logger.verbose(`Wait action completed for selector: ${waitSelector}`);
193+
return { status: 'success' };
194+
}
195+
196+
case 'content': {
197+
const content = await filterPageContent(page, pageFilter);
198+
logger.verbose('Page content retrieved successfully');
199+
logger.verbose(`Content length: ${content.length} characters`);
200+
return { status: 'success', content };
201+
}
202+
203+
case 'close': {
204+
await session.page.context().close();
205+
await session.browser.close();
206+
browserSessions.delete(instanceId);
207+
logger.verbose('Browser session closed successfully');
208+
return { status: 'closed' };
209+
}
210+
211+
default: {
212+
throw new Error(
213+
`Unsupported action type: ${(action as BrowserAction).actionType}`,
214+
);
215+
}
216+
}
217+
} catch (error) {
218+
logger.error('Browser action failed:', { error });
219+
return {
220+
status: 'error',
221+
error: errorToString(error),
222+
};
223+
}
224+
},
225+
226+
logParameters: (
227+
{ action, description },
228+
{ logger, pageFilter = 'simple' },
229+
) => {
230+
logger.info(
231+
`Performing browser action: ${action.actionType} with ${pageFilter} processing, ${description}`,
232+
);
233+
},
234+
235+
logReturns: (output, { logger }) => {
236+
if (output.error) {
237+
logger.error(`Browser action failed: ${output.error}`);
238+
} else {
239+
logger.info(`Browser action completed with status: ${output.status}`);
240+
}
241+
},
242+
};

0 commit comments

Comments
 (0)