feat: add back token tracking, system prompt caching.

bhouston · bhouston · commit ddc04ab0778e · 2025-03-11T12:50:05.000-04:00
diff --git a/packages/agent/package.json b/packages/agent/package.json
@@ -44,7 +44,7 @@
   "author": "Ben Houston",
   "license": "MIT",
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.16.0",
+    "@anthropic-ai/sdk": "^0.37",
     "@mozilla/readability": "^0.5.0",
     "@playwright/test": "^1.50.1",
     "@vitest/browser": "^3.0.5",
diff --git a/packages/agent/src/core/llm/provider.ts b/packages/agent/src/core/llm/provider.ts
@@ -31,14 +31,6 @@ export interface LLMProvider {
    * @returns Response with text and/or tool calls
    */
   generateText(options: GenerateOptions): Promise<LLMResponse>;
-
-  /**
-   * Get the number of tokens in a given text
-   *
-   * @param text Text to count tokens for
-   * @returns Number of tokens
-   */
-  countTokens(text: string): Promise<number>;
 }
 
 // Provider factory registry
diff --git a/packages/agent/src/core/llm/providers/anthropic.ts b/packages/agent/src/core/llm/providers/anthropic.ts
@@ -3,6 +3,7 @@
  */
 import Anthropic from '@anthropic-ai/sdk';
 
+import { TokenUsage } from '../../tokens.js';
 import { LLMProvider } from '../provider.js';
 import {
   GenerateOptions,
@@ -77,6 +78,15 @@ function addCacheControlToMessages(
   });
 }
 
+function tokenUsageFromMessage(message: Anthropic.Message) {
+  const usage = new TokenUsage();
+  usage.input = message.usage.input_tokens;
+  usage.cacheWrites = message.usage.cache_creation_input_tokens ?? 0;
+  usage.cacheReads = message.usage.cache_read_input_tokens ?? 0;
+  usage.output = message.usage.output_tokens;
+  return usage;
+}
+
 /**
  * Anthropic provider implementation
  */
@@ -115,43 +125,48 @@ export class AnthropicProvider implements LLMProvider {
     const nonSystemMessages = messages.filter((msg) => msg.role !== 'system');
     const formattedMessages = this.formatMessages(nonSystemMessages);
 
+    const tools = addCacheControlToTools(
+      (functions ?? []).map((fn) => ({
+        name: fn.name,
+        description: fn.description,
+        input_schema: fn.parameters as Anthropic.Tool.InputSchema,
+      })),
+    );
+
     try {
       const requestOptions: Anthropic.MessageCreateParams = {
         model: this.model,
         messages: addCacheControlToMessages(formattedMessages),
         temperature,
         max_tokens: maxTokens || 1024,
-        system: systemMessage?.content,
+        system: systemMessage?.content
+          ? [
+              {
+                type: 'text',
+                text: systemMessage?.content,
+                cache_control: { type: 'ephemeral' },
+              },
+            ]
+          : undefined,
         top_p: topP,
+        tools,
         stream: false,
       };
 
-      // Add tools if provided
-      if (functions && functions.length > 0) {
-        const tools = functions.map((fn) => ({
-          name: fn.name,
-          description: fn.description,
-          input_schema: fn.parameters,
-        }));
-        (requestOptions as any).tools = addCacheControlToTools(tools);
-      }
-
       const response = await this.client.messages.create(requestOptions);
 
       // Extract content and tool calls
       const content =
         response.content.find((c) => c.type === 'text')?.text || '';
       const toolCalls = response.content
         .filter((c) => {
-          const contentType = (c as any).type;
+          const contentType = c.type;
           return contentType === 'tool_use';
         })
         .map((c) => {
-          const toolUse = c as any;
+          const toolUse = c as Anthropic.Messages.ToolUseBlock;
           return {
-            id:
-              toolUse.id ||
-              `tool-${Math.random().toString(36).substring(2, 11)}`,
+            id: toolUse.id,
             name: toolUse.name,
             content: JSON.stringify(toolUse.input),
           };
@@ -160,6 +175,7 @@ export class AnthropicProvider implements LLMProvider {
       return {
         text: content,
         toolCalls: toolCalls,
+        tokenUsage: tokenUsageFromMessage(response),
       };
     } catch (error) {
       throw new Error(
@@ -168,20 +184,12 @@ export class AnthropicProvider implements LLMProvider {
     }
   }
 
-  /**
-   * Count tokens in a text using Anthropic's tokenizer
-   * Note: This is a simplified implementation
-   */
-  async countTokens(text: string): Promise<number> {
-    // In a real implementation, you would use Anthropic's tokenizer
-    // This is a simplified approximation
-    return Math.ceil(text.length / 3.5);
-  }
-
   /**
    * Format messages for Anthropic API
    */
-  private formatMessages(messages: Message[]): any[] {
+  private formatMessages(
+    messages: Message[],
+  ): Anthropic.Messages.MessageParam[] {
     // Format messages for Anthropic API
     return messages.map((msg) => {
       if (msg.role === 'user') {
diff --git a/packages/agent/src/core/llm/types.ts b/packages/agent/src/core/llm/types.ts
@@ -2,6 +2,9 @@
  * Core message types for LLM interactions
  */
 
+import { JsonSchema7Type } from 'zod-to-json-schema';
+
+import { TokenUsage } from '../tokens';
 import { ToolCall } from '../types';
 
 /**
@@ -67,7 +70,7 @@ export type Message =
 export interface FunctionDefinition {
   name: string;
   description: string;
-  parameters: Record<string, any>; // JSON Schema object
+  parameters: JsonSchema7Type; // JSON Schema object
 }
 
 /**
@@ -76,6 +79,7 @@ export interface FunctionDefinition {
 export interface LLMResponse {
   text: string;
   toolCalls: ToolCall[];
+  tokenUsage: TokenUsage;
 }
 
 /**
diff --git a/packages/agent/src/core/toolAgent/toolAgentCore.ts b/packages/agent/src/core/toolAgent/toolAgentCore.ts
@@ -76,7 +76,12 @@ export const toolAgent = async (
       maxTokens: config.maxTokens,
     };
 
-    const { text, toolCalls } = await generateText(provider, generateOptions);
+    const { text, toolCalls, tokenUsage } = await generateText(
+      provider,
+      generateOptions,
+    );
+
+    tokenTracker.tokenUsage.add(tokenUsage);
 
     if (!text.length && toolCalls.length === 0) {
       // Only consider it empty if there's no text AND no tool calls
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml