cache system prompt as well.

bhouston · bhouston · commit 3dfa8d1f3507 · 2025-03-03T20:25:10.000-05:00
diff --git a/packages/agent/src/core/toolAgent.ts b/packages/agent/src/core/toolAgent.ts
@@ -8,7 +8,6 @@ import {
   ToolResultPart,
   ToolSet,
   tool as makeTool,
-  Message
 } from 'ai';
 import chalk from 'chalk';
 
@@ -192,40 +191,47 @@ async function executeTools(
   };
 }
 
+function createCacheControlMessageFromSystemPrompt(
+  systemPrompt: string,
+): CoreMessage {
+  return {
+    role: 'system',
+    content: systemPrompt,
+    providerOptions: {
+      anthropic: { cacheControl: { type: 'ephemeral' } },
+    },
+  };
+}
+
 /**
  * Adds cache control to the messages for token caching with the Vercel AI SDK
  * This marks the last two messages as ephemeral which allows the conversation up to that
  * point to be cached (with a ~5 minute window), reducing token usage when making multiple API calls
  */
 function addCacheControlToMessages(messages: CoreMessage[]): CoreMessage[] {
   if (messages.length <= 1) return messages;
-  
+
   // Create a deep copy of the messages array to avoid mutating the original
   const result = JSON.parse(JSON.stringify(messages)) as CoreMessage[];
-  
+
   // Get the last two messages (if available)
-  const lastTwoMessageIndices = [
-    messages.length - 1, 
-    messages.length - 2
-  ];
-  
+  const lastTwoMessageIndices = [messages.length - 1, messages.length - 2];
+
   // Add providerOptions with anthropic cache control to the last two messages
-  lastTwoMessageIndices.forEach(index => {
+  lastTwoMessageIndices.forEach((index) => {
     if (index >= 0) {
       const message = result[index];
       if (message) {
         // For the Vercel AI SDK, we need to add the providerOptions.anthropic property
         // with cacheControl: 'ephemeral' to enable token caching
         message.providerOptions = {
           ...message.providerOptions,
-          anthropic: {
-            cacheControl: 'ephemeral'
-          }
+          anthropic: { cacheControl: { type: 'ephemeral' } },
         };
       }
     }
   });
-  
+
   return result;
 }
 
@@ -275,13 +281,15 @@ export const toolAgent = async (
       });
     });
     // Apply cache control to messages for token caching
-    const messagesWithCacheControl = addCacheControlToMessages(messages);
-    
+    const messagesWithCacheControl = [
+      createCacheControlMessageFromSystemPrompt(systemPrompt),
+      ...addCacheControlToMessages(messages),
+    ];
+
     const generateTextProps = {
       model: config.model,
       temperature: config.temperature,
       messages: messagesWithCacheControl,
-      system: systemPrompt,
       tools: toolSet,
     };
     const { text, toolCalls } = await generateText(generateTextProps);