drivecore · bhouston · Mar 5, 2025 · Mar 5, 2025 · Mar 5, 2025
diff --git a/.changeset/convert-to-zod.md b/.changeset/convert-to-zod.md
diff --git a/.changeset/github-mode.md b/.changeset/github-mode.md
diff --git a/.changeset/implement-token-caching.md b/.changeset/implement-token-caching.md
diff --git a/.changeset/openai-models.md b/.changeset/openai-models.md
diff --git a/.changeset/performance-profiling.md b/.changeset/performance-profiling.md
diff --git a/.changeset/refactor-tool-agent.md b/.changeset/refactor-tool-agent.md
diff --git a/.changeset/text-editor.md b/.changeset/text-editor.md
diff --git a/.changeset/windows-compatibility.md b/.changeset/windows-compatibility.md
diff --git a/packages/agent/CHANGELOG.md b/packages/agent/CHANGELOG.md
@@ -1,5 +1,21 @@
 # mycoder-agent
 
+## 0.5.0
+
+### Minor Changes
+
+- a51b970: Convert from JsonSchema7Type to ZodSchema for tool parameters and returns, required for Vercel AI SDK integration.
+- 27f73e3: Add GitHub mode to MyCoder for working with issues and PRs
+- 66ff39e: Add support for OpenAI models (o3 mini and GPT-4o) via Vercel AI SDK
+- 9b9d953: Add performance profiler via --profile to diagnose slow start-up times on some OSes.
+- 27c2ba5: Refactored toolAgent.ts into modular components for improved maintainability and testability. Split into config.ts, messageUtils.ts, toolExecutor.ts, tokenTracking.ts, and types.ts modules.
+- a4331b8: Add textEditor tool that combines readFile and updateFile functionality
+- 9b9d953: Use cross platform compatibility tooling to build up context, rather than Linux/MacOS specific tools.
+
+### Patch Changes
+
+- 870cbee: Re-implemented token caching for Vercel AI SDK usage with Anthropic provider to reduce token consumption during repeated API calls.
+
 ## 0.4.0
 
 ### Minor Changes

diff --git a/packages/agent/package.json b/packages/agent/package.json
@@ -1,6 +1,6 @@
 {
   "name": "mycoder-agent",
-  "version": "0.4.0",
+  "version": "0.5.0",
   "description": "Agent module for mycoder - an AI-powered software development assistant",
   "type": "module",
   "main": "dist/index.js",

diff --git a/packages/agent/src/tools/browser/browseMessage.ts b/packages/agent/src/tools/browser/browseMessage.ts
@@ -6,41 +6,35 @@ import { errorToString } from '../../utils/errorToString.js';
 import { sleep } from '../../utils/sleep.js';
 
 import { filterPageContent } from './filterPageContent.js';
-import { browserSessions, type BrowserAction, SelectorType } from './types.js';
-
-// Schema for browser action
-const browserActionSchema = z
-  .object({
-    actionType: z.enum(['goto', 'click', 'type', 'wait', 'content', 'close']),
-    url: z
-      .string()
-      .url()
-      .optional()
-      .describe('URL to navigate to if "goto" actionType'),
-    selector: z
-      .string()
-      .optional()
-      .describe('Selector to click if "click" actionType'),
-    selectorType: z
-      .nativeEnum(SelectorType)
-      .optional()
-      .describe('Type of selector if "click" actionType'),
-    text: z
-      .string()
-      .optional()
-      .describe(
-        'Text to type if "type" actionType, for other actionType, this is ignored',
-      ),
-  })
-  .describe('Browser action to perform');
+import { browserSessions, SelectorType } from './types.js';
 
 // Main parameter schema
 const parameterSchema = z.object({
   instanceId: z.string().describe('The ID returned by browseStart'),
-  action: browserActionSchema,
+  actionType: z
+    .enum(['goto', 'click', 'type', 'wait', 'content', 'close'])
+    .describe('Browser action to perform'),
+  url: z
+    .string()
+    .url()
+    .optional()
+    .describe('URL to navigate to if "goto" actionType'),
+  selector: z
+    .string()
+    .optional()
+    .describe('Selector to click if "click" actionType'),
+  selectorType: z
+    .nativeEnum(SelectorType)
+    .optional()
+    .describe('Type of selector if "click" actionType'),
+  text: z
+    .string()
+    .optional()
+    .describe(
+      'Text to type if "type" actionType, for other actionType, this is ignored',
+    ),
   description: z
     .string()
-    .max(80)
     .describe('The reason for this browser action (max 80 chars)'),
 });
 
@@ -76,27 +70,20 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
   returnsJsonSchema: zodToJsonSchema(returnSchema),
 
   execute: async (
-    { instanceId, action },
+    { instanceId, actionType, url, selector, selectorType, text },
     { logger, pageFilter },
   ): Promise<ReturnType> => {
     // Validate action format
-    if (!action || typeof action !== 'object') {
-      logger.error('Invalid action format: action must be an object');
-      return {
-        status: 'error',
-        error: 'Invalid action format: action must be an object',
-      };
-    }
 
-    if (!action.actionType) {
+    if (!actionType) {
       logger.error('Invalid action format: actionType is required');
       return {
         status: 'error',
         error: 'Invalid action format: actionType is required',
       };
     }
 
-    logger.verbose(`Executing browser action: ${action.actionType}`);
+    logger.verbose(`Executing browser action: ${actionType}`);
     logger.verbose(`Webpage processing mode: ${pageFilter}`);
 
     try {
@@ -107,18 +94,18 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
 
       const { page } = session;
 
-      switch (action.actionType) {
+      switch (actionType) {
         case 'goto': {
-          if (!action.url) {
+          if (!url) {
             throw new Error('URL required for goto action');
           }
 
           try {
             // Try with 'domcontentloaded' first which is more reliable than 'networkidle'
             logger.verbose(
-              `Navigating to ${action.url} with 'domcontentloaded' waitUntil`,
+              `Navigating to ${url} with 'domcontentloaded' waitUntil`,
             );
-            await page.goto(action.url, { waitUntil: 'domcontentloaded' });
+            await page.goto(url, { waitUntil: 'domcontentloaded' });
             await sleep(3000);
             const content = await filterPageContent(page, pageFilter);
             logger.verbose(`Content: ${content}`);
@@ -133,11 +120,11 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
               `Failed with domcontentloaded strategy: ${errorToString(navError)}`,
             );
             logger.verbose(
-              `Retrying navigation to ${action.url} with no waitUntil option`,
+              `Retrying navigation to ${url} with no waitUntil option`,
             );
 
             try {
-              await page.goto(action.url);
+              await page.goto(url);
               await sleep(3000);
               const content = await filterPageContent(page, pageFilter);
               logger.verbose(`Content: ${content}`);
@@ -153,13 +140,10 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
         }
 
         case 'click': {
-          if (!action.selector) {
+          if (!selector) {
             throw new Error('Selector required for click action');
           }
-          const clickSelector = getSelector(
-            action.selector,
-            action.selectorType,
-          );
+          const clickSelector = getSelector(selector, selectorType);
           await page.click(clickSelector);
           await sleep(1000); // Wait for any content changes after click
           const content = await filterPageContent(page, pageFilter);
@@ -170,26 +154,20 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
         }
 
         case 'type': {
-          if (!action.selector || !action.text) {
+          if (!selector || !text) {
             throw new Error('Selector and text required for type action');
           }
-          const typeSelector = getSelector(
-            action.selector,
-            action.selectorType,
-          );
-          await page.fill(typeSelector, action.text);
+          const typeSelector = getSelector(selector, selectorType);
+          await page.fill(typeSelector, text);
           logger.verbose(`Type action completed on selector: ${typeSelector}`);
           return { status: 'success' };
         }
 
         case 'wait': {
-          if (!action.selector) {
+          if (!selector) {
             throw new Error('Selector required for wait action');
           }
-          const waitSelector = getSelector(
-            action.selector,
-            action.selectorType,
-          );
+          const waitSelector = getSelector(selector, selectorType);
           await page.waitForSelector(waitSelector);
           logger.verbose(`Wait action completed for selector: ${waitSelector}`);
           return { status: 'success' };
@@ -211,9 +189,7 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
         }
 
         default: {
-          throw new Error(
-            `Unsupported action type: ${(action as BrowserAction).actionType}`,
-          );
+          throw new Error(`Unsupported action type: ${actionType}`);
         }
       }
     } catch (error) {
@@ -226,11 +202,11 @@ export const browseMessageTool: Tool<Parameters, ReturnType> = {
   },
 
   logParameters: (
-    { action, description },
+    { actionType, description },
     { logger, pageFilter = 'simple' },
   ) => {
     logger.info(
-      `Performing browser action: ${action.actionType} with ${pageFilter} processing, ${description}`,
+      `Performing browser action: ${actionType} with ${pageFilter} processing, ${description}`,
     );
   },
 

diff --git a/packages/agent/src/tools/browser/browseStart.ts b/packages/agent/src/tools/browser/browseStart.ts
@@ -18,7 +18,6 @@ const parameterSchema = z.object({
     .describe('Default timeout in milliseconds (default: 30000)'),
   description: z
     .string()
-    .max(80)
     .describe('The reason for starting this browser session (max 80 chars)'),
 });
 

diff --git a/packages/agent/src/tools/interaction/subAgent.ts b/packages/agent/src/tools/interaction/subAgent.ts
@@ -12,7 +12,6 @@ import { getTools } from '../getTools.js';
 const parameterSchema = z.object({
   description: z
     .string()
-    .max(80)
     .describe("A brief description of the sub-agent's purpose (max 80 chars)"),
   goal: z
     .string()

diff --git a/packages/agent/src/tools/system/shellExecute.ts b/packages/agent/src/tools/system/shellExecute.ts
@@ -15,7 +15,6 @@ const parameterSchema = z.object({
     .describe('The shell command to execute in MacOS bash format'),
   description: z
     .string()
-    .max(80)
     .describe('The reason this shell command is being run (max 80 chars)'),
   timeout: z
     .number()

diff --git a/packages/agent/src/tools/system/shellMessage.ts b/packages/agent/src/tools/system/shellMessage.ts
@@ -53,7 +53,6 @@ const parameterSchema = z.object({
     .describe('Signal to send to the process (e.g., SIGTERM, SIGINT)'),
   description: z
     .string()
-    .max(80)
     .describe('The reason for this shell interaction (max 80 chars)'),
 });
 

diff --git a/packages/agent/src/tools/system/shellStart.ts b/packages/agent/src/tools/system/shellStart.ts
@@ -29,7 +29,6 @@ const parameterSchema = z.object({
   command: z.string().describe('The shell command to execute'),
   description: z
     .string()
-    .max(80)
     .describe('The reason this shell command is being run (max 80 chars)'),
   timeout: z
     .number()

diff --git a/packages/cli/CHANGELOG.md b/packages/cli/CHANGELOG.md
@@ -1,5 +1,15 @@
 # mycoder
 
+## 0.5.0
+
+### Minor Changes
+
+- a51b970: Convert from JsonSchema7Type to ZodSchema for tool parameters and returns, required for Vercel AI SDK integration.
+- 27f73e3: Add GitHub mode to MyCoder for working with issues and PRs
+- 66ff39e: Add support for OpenAI models (o3 mini and GPT-4o) via Vercel AI SDK
+- 9b9d953: Add performance profiler via --profile to diagnose slow start-up times on some OSes.
+- 9b9d953: Use cross platform compatibility tooling to build up context, rather than Linux/MacOS specific tools.
+
 ## 0.4.0
 
 ### Minor Changes

diff --git a/packages/cli/package.json b/packages/cli/package.json
@@ -1,7 +1,7 @@
 {
   "name": "mycoder",
   "description": "A command line tool using agent that can do arbitrary tasks, including coding tasks",
-  "version": "0.4.0",
+  "version": "0.5.0",
   "type": "module",
   "bin": "./bin/cli.js",
   "main": "./dist/index.js",