Merge pull request #176 from drivecore/feature/145-token-caching

bhouston · web-flow · commit 080c8fb484ed · 2025-03-11T12:51:48.000-04:00
Feature/145 token caching
diff --git a/packages/agent/package.json b/packages/agent/package.json
@@ -44,7 +44,7 @@
   "author": "Ben Houston",
   "license": "MIT",
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.16.0",
+    "@anthropic-ai/sdk": "^0.37",
     "@mozilla/readability": "^0.5.0",
     "@playwright/test": "^1.50.1",
     "@vitest/browser": "^3.0.5",
diff --git a/packages/agent/src/core/llm/provider.ts b/packages/agent/src/core/llm/provider.ts
@@ -31,14 +31,6 @@ export interface LLMProvider {
    * @returns Response with text and/or tool calls
    */
   generateText(options: GenerateOptions): Promise<LLMResponse>;
-
-  /**
-   * Get the number of tokens in a given text
-   *
-   * @param text Text to count tokens for
-   * @returns Number of tokens
-   */
-  countTokens(text: string): Promise<number>;
 }
 
 // Provider factory registry
diff --git a/packages/agent/src/core/llm/providers/anthropic.ts b/packages/agent/src/core/llm/providers/anthropic.ts
@@ -3,6 +3,7 @@
  */
 import Anthropic from '@anthropic-ai/sdk';
 
+import { TokenUsage } from '../../tokens.js';
 import { LLMProvider } from '../provider.js';
 import {
   GenerateOptions,
@@ -19,6 +20,73 @@ export interface AnthropicOptions extends ProviderOptions {
   baseUrl?: string;
 }
 
+// a function that takes a list of messages and returns a list of messages but with the last message having a cache_control of ephemeral
+function addCacheControlToTools<T>(messages: T[]): T[] {
+  return messages.map((m, i) => ({
+    ...m,
+    ...(i === messages.length - 1
+      ? { cache_control: { type: 'ephemeral' } }
+      : {}),
+  }));
+}
+
+function addCacheControlToContentBlocks(
+  content: Anthropic.Messages.TextBlock[],
+): Anthropic.Messages.TextBlock[] {
+  return content.map((c, i) => {
+    if (i === content.length - 1) {
+      if (
+        c.type === 'text' ||
+        c.type === 'document' ||
+        c.type === 'image' ||
+        c.type === 'tool_use' ||
+        c.type === 'tool_result' ||
+        c.type === 'thinking' ||
+        c.type === 'redacted_thinking'
+      ) {
+        return { ...c, cache_control: { type: 'ephemeral' } };
+      }
+    }
+    return c;
+  });
+}
+function addCacheControlToMessages(
+  messages: Anthropic.Messages.MessageParam[],
+): Anthropic.Messages.MessageParam[] {
+  return messages.map((m, i) => {
+    if (typeof m.content === 'string') {
+      return {
+        ...m,
+        content: [
+          {
+            type: 'text',
+            text: m.content,
+            cache_control: { type: 'ephemeral' },
+          },
+        ],
+      };
+    }
+    return {
+      ...m,
+      content:
+        i >= messages.length - 2
+          ? addCacheControlToContentBlocks(
+              m.content as Anthropic.Messages.TextBlock[],
+            )
+          : m.content,
+    };
+  });
+}
+
+function tokenUsageFromMessage(message: Anthropic.Message) {
+  const usage = new TokenUsage();
+  usage.input = message.usage.input_tokens;
+  usage.cacheWrites = message.usage.cache_creation_input_tokens ?? 0;
+  usage.cacheReads = message.usage.cache_read_input_tokens ?? 0;
+  usage.output = message.usage.output_tokens;
+  return usage;
+}
+
 /**
  * Anthropic provider implementation
  */
@@ -50,57 +118,55 @@ export class AnthropicProvider implements LLMProvider {
    * Generate text using Anthropic API
    */
   async generateText(options: GenerateOptions): Promise<LLMResponse> {
-    const {
-      messages,
-      functions,
-      temperature = 0.7,
-      maxTokens,
-      stopSequences,
-      topP,
-    } = options;
+    const { messages, functions, temperature = 0.7, maxTokens, topP } = options;
 
     // Extract system message
     const systemMessage = messages.find((msg) => msg.role === 'system');
     const nonSystemMessages = messages.filter((msg) => msg.role !== 'system');
     const formattedMessages = this.formatMessages(nonSystemMessages);
 
+    const tools = addCacheControlToTools(
+      (functions ?? []).map((fn) => ({
+        name: fn.name,
+        description: fn.description,
+        input_schema: fn.parameters as Anthropic.Tool.InputSchema,
+      })),
+    );
+
     try {
       const requestOptions: Anthropic.MessageCreateParams = {
         model: this.model,
-        messages: formattedMessages,
+        messages: addCacheControlToMessages(formattedMessages),
         temperature,
         max_tokens: maxTokens || 1024,
-        ...(stopSequences && { stop_sequences: stopSequences }),
-        ...(topP && { top_p: topP }),
-        ...(systemMessage && { system: systemMessage.content }),
+        system: systemMessage?.content
+          ? [
+              {
+                type: 'text',
+                text: systemMessage?.content,
+                cache_control: { type: 'ephemeral' },
+              },
+            ]
+          : undefined,
+        top_p: topP,
+        tools,
+        stream: false,
       };
 
-      // Add tools if provided
-      if (functions && functions.length > 0) {
-        const tools = functions.map((fn) => ({
-          name: fn.name,
-          description: fn.description,
-          input_schema: fn.parameters,
-        }));
-        (requestOptions as any).tools = tools;
-      }
-
       const response = await this.client.messages.create(requestOptions);
 
       // Extract content and tool calls
       const content =
         response.content.find((c) => c.type === 'text')?.text || '';
       const toolCalls = response.content
         .filter((c) => {
-          const contentType = (c as any).type;
+          const contentType = c.type;
           return contentType === 'tool_use';
         })
         .map((c) => {
-          const toolUse = c as any;
+          const toolUse = c as Anthropic.Messages.ToolUseBlock;
           return {
-            id:
-              toolUse.id ||
-              `tool-${Math.random().toString(36).substring(2, 11)}`,
+            id: toolUse.id,
             name: toolUse.name,
             content: JSON.stringify(toolUse.input),
           };
@@ -109,6 +175,7 @@ export class AnthropicProvider implements LLMProvider {
       return {
         text: content,
         toolCalls: toolCalls,
+        tokenUsage: tokenUsageFromMessage(response),
       };
     } catch (error) {
       throw new Error(
@@ -117,20 +184,12 @@ export class AnthropicProvider implements LLMProvider {
     }
   }
 
-  /**
-   * Count tokens in a text using Anthropic's tokenizer
-   * Note: This is a simplified implementation
-   */
-  async countTokens(text: string): Promise<number> {
-    // In a real implementation, you would use Anthropic's tokenizer
-    // This is a simplified approximation
-    return Math.ceil(text.length / 3.5);
-  }
-
   /**
    * Format messages for Anthropic API
    */
-  private formatMessages(messages: Message[]): any[] {
+  private formatMessages(
+    messages: Message[],
+  ): Anthropic.Messages.MessageParam[] {
     // Format messages for Anthropic API
     return messages.map((msg) => {
       if (msg.role === 'user') {
diff --git a/packages/agent/src/core/llm/types.ts b/packages/agent/src/core/llm/types.ts
@@ -2,6 +2,9 @@
  * Core message types for LLM interactions
  */
 
+import { JsonSchema7Type } from 'zod-to-json-schema';
+
+import { TokenUsage } from '../tokens';
 import { ToolCall } from '../types';
 
 /**
@@ -67,7 +70,7 @@ export type Message =
 export interface FunctionDefinition {
   name: string;
   description: string;
-  parameters: Record<string, any>; // JSON Schema object
+  parameters: JsonSchema7Type; // JSON Schema object
 }
 
 /**
@@ -76,6 +79,7 @@ export interface FunctionDefinition {
 export interface LLMResponse {
   text: string;
   toolCalls: ToolCall[];
+  tokenUsage: TokenUsage;
 }
 
 /**
diff --git a/packages/agent/src/core/toolAgent/toolAgentCore.ts b/packages/agent/src/core/toolAgent/toolAgentCore.ts
@@ -76,7 +76,12 @@ export const toolAgent = async (
       maxTokens: config.maxTokens,
     };
 
-    const { text, toolCalls } = await generateText(provider, generateOptions);
+    const { text, toolCalls, tokenUsage } = await generateText(
+      provider,
+      generateOptions,
+    );
+
+    tokenTracker.tokenUsage.add(tokenUsage);
 
     if (!text.length && toolCalls.length === 0) {
       // Only consider it empty if there's no text AND no tool calls
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml