Skip to content

Commit 080c8fb

Browse files
authored
Merge pull request #176 from drivecore/feature/145-token-caching
Feature/145 token caching
2 parents daeaf19 + ddc04ab commit 080c8fb

File tree

6 files changed

+114
-99
lines changed

6 files changed

+114
-99
lines changed

packages/agent/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"author": "Ben Houston",
4545
"license": "MIT",
4646
"dependencies": {
47-
"@anthropic-ai/sdk": "^0.16.0",
47+
"@anthropic-ai/sdk": "^0.37",
4848
"@mozilla/readability": "^0.5.0",
4949
"@playwright/test": "^1.50.1",
5050
"@vitest/browser": "^3.0.5",

packages/agent/src/core/llm/provider.ts

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,6 @@ export interface LLMProvider {
3131
* @returns Response with text and/or tool calls
3232
*/
3333
generateText(options: GenerateOptions): Promise<LLMResponse>;
34-
35-
/**
36-
* Get the number of tokens in a given text
37-
*
38-
* @param text Text to count tokens for
39-
* @returns Number of tokens
40-
*/
41-
countTokens(text: string): Promise<number>;
4234
}
4335

4436
// Provider factory registry

packages/agent/src/core/llm/providers/anthropic.ts

Lines changed: 97 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44
import Anthropic from '@anthropic-ai/sdk';
55

6+
import { TokenUsage } from '../../tokens.js';
67
import { LLMProvider } from '../provider.js';
78
import {
89
GenerateOptions,
@@ -19,6 +20,73 @@ export interface AnthropicOptions extends ProviderOptions {
1920
baseUrl?: string;
2021
}
2122

23+
// a function that takes a list of messages and returns a list of messages but with the last message having a cache_control of ephemeral
24+
function addCacheControlToTools<T>(messages: T[]): T[] {
25+
return messages.map((m, i) => ({
26+
...m,
27+
...(i === messages.length - 1
28+
? { cache_control: { type: 'ephemeral' } }
29+
: {}),
30+
}));
31+
}
32+
33+
function addCacheControlToContentBlocks(
34+
content: Anthropic.Messages.TextBlock[],
35+
): Anthropic.Messages.TextBlock[] {
36+
return content.map((c, i) => {
37+
if (i === content.length - 1) {
38+
if (
39+
c.type === 'text' ||
40+
c.type === 'document' ||
41+
c.type === 'image' ||
42+
c.type === 'tool_use' ||
43+
c.type === 'tool_result' ||
44+
c.type === 'thinking' ||
45+
c.type === 'redacted_thinking'
46+
) {
47+
return { ...c, cache_control: { type: 'ephemeral' } };
48+
}
49+
}
50+
return c;
51+
});
52+
}
53+
function addCacheControlToMessages(
54+
messages: Anthropic.Messages.MessageParam[],
55+
): Anthropic.Messages.MessageParam[] {
56+
return messages.map((m, i) => {
57+
if (typeof m.content === 'string') {
58+
return {
59+
...m,
60+
content: [
61+
{
62+
type: 'text',
63+
text: m.content,
64+
cache_control: { type: 'ephemeral' },
65+
},
66+
],
67+
};
68+
}
69+
return {
70+
...m,
71+
content:
72+
i >= messages.length - 2
73+
? addCacheControlToContentBlocks(
74+
m.content as Anthropic.Messages.TextBlock[],
75+
)
76+
: m.content,
77+
};
78+
});
79+
}
80+
81+
function tokenUsageFromMessage(message: Anthropic.Message) {
82+
const usage = new TokenUsage();
83+
usage.input = message.usage.input_tokens;
84+
usage.cacheWrites = message.usage.cache_creation_input_tokens ?? 0;
85+
usage.cacheReads = message.usage.cache_read_input_tokens ?? 0;
86+
usage.output = message.usage.output_tokens;
87+
return usage;
88+
}
89+
2290
/**
2391
* Anthropic provider implementation
2492
*/
@@ -50,57 +118,55 @@ export class AnthropicProvider implements LLMProvider {
50118
* Generate text using Anthropic API
51119
*/
52120
async generateText(options: GenerateOptions): Promise<LLMResponse> {
53-
const {
54-
messages,
55-
functions,
56-
temperature = 0.7,
57-
maxTokens,
58-
stopSequences,
59-
topP,
60-
} = options;
121+
const { messages, functions, temperature = 0.7, maxTokens, topP } = options;
61122

62123
// Extract system message
63124
const systemMessage = messages.find((msg) => msg.role === 'system');
64125
const nonSystemMessages = messages.filter((msg) => msg.role !== 'system');
65126
const formattedMessages = this.formatMessages(nonSystemMessages);
66127

128+
const tools = addCacheControlToTools(
129+
(functions ?? []).map((fn) => ({
130+
name: fn.name,
131+
description: fn.description,
132+
input_schema: fn.parameters as Anthropic.Tool.InputSchema,
133+
})),
134+
);
135+
67136
try {
68137
const requestOptions: Anthropic.MessageCreateParams = {
69138
model: this.model,
70-
messages: formattedMessages,
139+
messages: addCacheControlToMessages(formattedMessages),
71140
temperature,
72141
max_tokens: maxTokens || 1024,
73-
...(stopSequences && { stop_sequences: stopSequences }),
74-
...(topP && { top_p: topP }),
75-
...(systemMessage && { system: systemMessage.content }),
142+
system: systemMessage?.content
143+
? [
144+
{
145+
type: 'text',
146+
text: systemMessage?.content,
147+
cache_control: { type: 'ephemeral' },
148+
},
149+
]
150+
: undefined,
151+
top_p: topP,
152+
tools,
153+
stream: false,
76154
};
77155

78-
// Add tools if provided
79-
if (functions && functions.length > 0) {
80-
const tools = functions.map((fn) => ({
81-
name: fn.name,
82-
description: fn.description,
83-
input_schema: fn.parameters,
84-
}));
85-
(requestOptions as any).tools = tools;
86-
}
87-
88156
const response = await this.client.messages.create(requestOptions);
89157

90158
// Extract content and tool calls
91159
const content =
92160
response.content.find((c) => c.type === 'text')?.text || '';
93161
const toolCalls = response.content
94162
.filter((c) => {
95-
const contentType = (c as any).type;
163+
const contentType = c.type;
96164
return contentType === 'tool_use';
97165
})
98166
.map((c) => {
99-
const toolUse = c as any;
167+
const toolUse = c as Anthropic.Messages.ToolUseBlock;
100168
return {
101-
id:
102-
toolUse.id ||
103-
`tool-${Math.random().toString(36).substring(2, 11)}`,
169+
id: toolUse.id,
104170
name: toolUse.name,
105171
content: JSON.stringify(toolUse.input),
106172
};
@@ -109,6 +175,7 @@ export class AnthropicProvider implements LLMProvider {
109175
return {
110176
text: content,
111177
toolCalls: toolCalls,
178+
tokenUsage: tokenUsageFromMessage(response),
112179
};
113180
} catch (error) {
114181
throw new Error(
@@ -117,20 +184,12 @@ export class AnthropicProvider implements LLMProvider {
117184
}
118185
}
119186

120-
/**
121-
* Count tokens in a text using Anthropic's tokenizer
122-
* Note: This is a simplified implementation
123-
*/
124-
async countTokens(text: string): Promise<number> {
125-
// In a real implementation, you would use Anthropic's tokenizer
126-
// This is a simplified approximation
127-
return Math.ceil(text.length / 3.5);
128-
}
129-
130187
/**
131188
* Format messages for Anthropic API
132189
*/
133-
private formatMessages(messages: Message[]): any[] {
190+
private formatMessages(
191+
messages: Message[],
192+
): Anthropic.Messages.MessageParam[] {
134193
// Format messages for Anthropic API
135194
return messages.map((msg) => {
136195
if (msg.role === 'user') {

packages/agent/src/core/llm/types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
* Core message types for LLM interactions
33
*/
44

5+
import { JsonSchema7Type } from 'zod-to-json-schema';
6+
7+
import { TokenUsage } from '../tokens';
58
import { ToolCall } from '../types';
69

710
/**
@@ -67,7 +70,7 @@ export type Message =
6770
export interface FunctionDefinition {
6871
name: string;
6972
description: string;
70-
parameters: Record<string, any>; // JSON Schema object
73+
parameters: JsonSchema7Type; // JSON Schema object
7174
}
7275

7376
/**
@@ -76,6 +79,7 @@ export interface FunctionDefinition {
7679
export interface LLMResponse {
7780
text: string;
7881
toolCalls: ToolCall[];
82+
tokenUsage: TokenUsage;
7983
}
8084

8185
/**

packages/agent/src/core/toolAgent/toolAgentCore.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,12 @@ export const toolAgent = async (
7676
maxTokens: config.maxTokens,
7777
};
7878

79-
const { text, toolCalls } = await generateText(provider, generateOptions);
79+
const { text, toolCalls, tokenUsage } = await generateText(
80+
provider,
81+
generateOptions,
82+
);
83+
84+
tokenTracker.tokenUsage.add(tokenUsage);
8085

8186
if (!text.length && toolCalls.length === 0) {
8287
// Only consider it empty if there's no text AND no tool calls

0 commit comments

Comments
 (0)