Skip to content

Commit bfad30f

Browse files
committed
Make model context window optional (Issue #362)
1 parent 3b7a93d commit bfad30f

File tree

6 files changed

+53
-89
lines changed

6 files changed

+53
-89
lines changed

packages/agent/src/core/llm/providers/anthropic.ts

Lines changed: 17 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,18 @@ import {
1212
ProviderOptions,
1313
} from '../types.js';
1414

15-
// Cache for model context window sizes
16-
const modelContextWindowCache: Record<string, number> = {};
15+
const ANTHROPIC_CONTEXT_WINDOWS: Record<string, number> = {
16+
'claude-3-7-sonnet-20250219': 200000,
17+
'claude-3-7-sonnet-latest': 200000,
18+
'claude-3-5-sonnet-20241022': 200000,
19+
'claude-3-5-sonnet-latest': 200000,
20+
'claude-3-haiku-20240307': 200000,
21+
'claude-3-opus-20240229': 200000,
22+
'claude-3-sonnet-20240229': 200000,
23+
'claude-2.1': 100000,
24+
'claude-2.0': 100000,
25+
'claude-instant-1.2': 100000,
26+
};
1727

1828
/**
1929
* Anthropic-specific options
@@ -87,7 +97,7 @@ function addCacheControlToMessages(
8797
function tokenUsageFromMessage(
8898
message: Anthropic.Message,
8999
model: string,
90-
contextWindow: number,
100+
contextWindow: number | undefined,
91101
) {
92102
const usage = new TokenUsage();
93103
usage.input = message.usage.input_tokens;
@@ -100,7 +110,7 @@ function tokenUsageFromMessage(
100110
return {
101111
usage,
102112
totalTokens,
103-
maxTokens: contextWindow,
113+
contextWindow,
104114
};
105115
}
106116

@@ -131,64 +141,12 @@ export class AnthropicProvider implements LLMProvider {
131141
});
132142
}
133143

134-
/**
135-
* Fetches the model context window size from the Anthropic API
136-
*
137-
* @returns The context window size
138-
* @throws Error if the context window size cannot be determined
139-
*/
140-
private async getModelContextWindow(): Promise<number> {
141-
const cachedContextWindow = modelContextWindowCache[this.model];
142-
if (cachedContextWindow !== undefined) {
143-
return cachedContextWindow;
144-
}
145-
const response = await this.client.models.list();
146-
147-
if (!response?.data || !Array.isArray(response.data)) {
148-
throw new Error(`Invalid response from models.list() for ${this.model}`);
149-
}
150-
151-
// Try to find the exact model
152-
let model = response.data.find((m) => m.id === this.model);
153-
154-
// If not found, try to find a model that starts with the same name
155-
// This helps with model aliases like 'claude-3-sonnet-latest'
156-
if (!model) {
157-
// Split by '-latest' or '-20' to get the base model name
158-
const parts = this.model.split('-latest');
159-
const modelPrefix =
160-
parts.length > 1 ? parts[0] : this.model.split('-20')[0];
161-
162-
if (modelPrefix) {
163-
model = response.data.find((m) => m.id.startsWith(modelPrefix));
164-
165-
if (model) {
166-
console.info(
167-
`Model ${this.model} not found, using ${model.id} for context window size`,
168-
);
169-
}
170-
}
171-
}
172-
173-
// Using type assertion to access context_window property
174-
// The Anthropic API returns context_window but it may not be in the TypeScript definitions
175-
if (model && 'context_window' in model) {
176-
const contextWindow = (model as any).context_window;
177-
// Cache the result for future use
178-
modelContextWindowCache[this.model] = contextWindow;
179-
return contextWindow;
180-
} else {
181-
throw new Error(
182-
`No context window information found for model: ${this.model}`,
183-
);
184-
}
185-
}
186-
187144
/**
188145
* Generate text using Anthropic API
189146
*/
190147
async generateText(options: GenerateOptions): Promise<LLMResponse> {
191-
const modelContextWindow = await this.getModelContextWindow();
148+
const modelContextWindow = ANTHROPIC_CONTEXT_WINDOWS[this.model];
149+
192150
const { messages, functions, temperature = 0.7, maxTokens, topP } = options;
193151

194152
// Extract system message
@@ -252,7 +210,7 @@ export class AnthropicProvider implements LLMProvider {
252210
toolCalls: toolCalls,
253211
tokenUsage: tokenInfo.usage,
254212
totalTokens: tokenInfo.totalTokens,
255-
maxTokens: tokenInfo.maxTokens,
213+
contextWindow: tokenInfo.contextWindow,
256214
};
257215
}
258216

packages/agent/src/core/llm/providers/ollama.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ import {
2424

2525
// Define model context window sizes for Ollama models
2626
// These are approximate and may vary based on specific model configurations
27-
const OLLAMA_MODEL_LIMITS: Record<string, number> = {
28-
default: 4096,
27+
const OLLAMA_CONTEXT_WINDOWS: Record<string, number> = {
2928
llama2: 4096,
3029
'llama2-uncensored': 4096,
3130
'llama2:13b': 4096,
@@ -136,19 +135,21 @@ export class OllamaProvider implements LLMProvider {
136135
const totalTokens = tokenUsage.input + tokenUsage.output;
137136

138137
// Extract the base model name without specific parameters
139-
const baseModelName = this.model.split(':')[0];
140138
// Check if model exists in limits, otherwise use base model or default
141-
const modelMaxTokens =
142-
OLLAMA_MODEL_LIMITS[this.model] ||
143-
(baseModelName ? OLLAMA_MODEL_LIMITS[baseModelName] : undefined) ||
144-
4096; // Default fallback
139+
let contextWindow = OLLAMA_CONTEXT_WINDOWS[this.model];
140+
if (!contextWindow) {
141+
const baseModelName = this.model.split(':')[0];
142+
if (baseModelName) {
143+
contextWindow = OLLAMA_CONTEXT_WINDOWS[baseModelName];
144+
}
145+
}
145146

146147
return {
147148
text: content,
148149
toolCalls: toolCalls,
149150
tokenUsage: tokenUsage,
150151
totalTokens,
151-
maxTokens: modelMaxTokens,
152+
contextWindow,
152153
};
153154
}
154155

packages/agent/src/core/llm/providers/openai.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ import type {
2020
} from 'openai/resources/chat';
2121

2222
// Define model context window sizes for OpenAI models
23-
const OPENAI_MODEL_LIMITS: Record<string, number> = {
24-
default: 128000,
23+
const OPENA_CONTEXT_WINDOWS: Record<string, number> = {
2524
'o3-mini': 200000,
2625
'o1-pro': 200000,
2726
o1: 200000,
@@ -136,14 +135,14 @@ export class OpenAIProvider implements LLMProvider {
136135

137136
// Calculate total tokens and get max tokens for the model
138137
const totalTokens = tokenUsage.input + tokenUsage.output;
139-
const modelMaxTokens = OPENAI_MODEL_LIMITS[this.model] || 8192; // Default fallback
138+
const contextWindow = OPENA_CONTEXT_WINDOWS[this.model];
140139

141140
return {
142141
text: content,
143142
toolCalls,
144143
tokenUsage,
145144
totalTokens,
146-
maxTokens: modelMaxTokens,
145+
contextWindow,
147146
};
148147
} catch (error) {
149148
throw new Error(`Error calling OpenAI API: ${(error as Error).message}`);

packages/agent/src/core/llm/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ export interface LLMResponse {
8282
tokenUsage: TokenUsage;
8383
// Add new fields for context window tracking
8484
totalTokens?: number; // Total tokens used in this request
85-
maxTokens?: number; // Maximum allowed tokens for this model
85+
contextWindow?: number; // Maximum allowed tokens for this model
8686
}
8787

8888
/**

packages/agent/src/core/toolAgent/statusUpdates.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@ import { ToolContext } from '../types.js';
1414
*/
1515
export function generateStatusUpdate(
1616
totalTokens: number,
17-
maxTokens: number,
17+
contextWindow: number | undefined,
1818
tokenTracker: TokenTracker,
1919
context: ToolContext,
2020
): Message {
2121
// Calculate token usage percentage
22-
const usagePercentage = Math.round((totalTokens / maxTokens) * 100);
22+
const usagePercentage = contextWindow
23+
? Math.round((totalTokens / contextWindow) * 100)
24+
: undefined;
2325

2426
// Get active sub-agents
2527
const activeAgents = context.agentTracker ? getActiveAgents(context) : [];
@@ -35,7 +37,9 @@ export function generateStatusUpdate(
3537
// Format the status message
3638
const statusContent = [
3739
`--- STATUS UPDATE ---`,
38-
`Token Usage: ${formatNumber(totalTokens)}/${formatNumber(maxTokens)} (${usagePercentage}%)`,
40+
contextWindow !== undefined
41+
? `Token Usage: ${formatNumber(totalTokens)}/${formatNumber(contextWindow)} (${usagePercentage}%)`
42+
: '',
3943
`Cost So Far: ${tokenTracker.getTotalCost()}`,
4044
``,
4145
`Active Sub-Agents: ${activeAgents.length}`,
@@ -47,9 +51,10 @@ export function generateStatusUpdate(
4751
`Active Browser Sessions: ${activeSessions.length}`,
4852
...activeSessions.map((s) => `- ${s.id}: ${s.description}`),
4953
``,
50-
usagePercentage >= 50
51-
? `Your token usage is high (${usagePercentage}%). It is recommended to use the 'compactHistory' tool now to reduce context size.`
52-
: `If token usage gets high (>50%), consider using the 'compactHistory' tool to reduce context size.`,
54+
usagePercentage !== undefined &&
55+
(usagePercentage >= 50
56+
? `Your token usage is high (${usagePercentage}%). It is recommended to use the 'compactHistory' tool now to reduce context size.`
57+
: `If token usage gets high (>50%), consider using the 'compactHistory' tool to reduce context size.`),
5358
`--- END STATUS ---`,
5459
].join('\n');
5560

packages/agent/src/core/toolAgent/toolAgentCore.ts

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -151,34 +151,35 @@ export const toolAgent = async (
151151
maxTokens: localContext.maxTokens,
152152
};
153153

154-
const { text, toolCalls, tokenUsage, totalTokens, maxTokens } =
154+
const { text, toolCalls, tokenUsage, totalTokens, contextWindow } =
155155
await generateText(provider, generateOptions);
156156

157157
tokenTracker.tokenUsage.add(tokenUsage);
158158

159159
// Send status updates based on frequency and token usage threshold
160160
statusUpdateCounter++;
161-
if (totalTokens && maxTokens) {
162-
const usagePercentage = Math.round((totalTokens / maxTokens) * 100);
163-
const shouldSendByFrequency =
164-
statusUpdateCounter >= STATUS_UPDATE_FREQUENCY;
165-
const shouldSendByUsage = usagePercentage >= TOKEN_USAGE_THRESHOLD;
161+
if (totalTokens) {
162+
let statusTriggered = false;
163+
statusTriggered ||= statusUpdateCounter >= STATUS_UPDATE_FREQUENCY;
164+
165+
if (contextWindow) {
166+
const usagePercentage = Math.round((totalTokens / contextWindow) * 100);
167+
statusTriggered ||= usagePercentage >= TOKEN_USAGE_THRESHOLD;
168+
}
166169

167170
// Send status update if either condition is met
168-
if (shouldSendByFrequency || shouldSendByUsage) {
171+
if (statusTriggered) {
169172
statusUpdateCounter = 0;
170173

171174
const statusMessage = generateStatusUpdate(
172175
totalTokens,
173-
maxTokens,
176+
contextWindow,
174177
tokenTracker,
175178
localContext,
176179
);
177180

178181
messages.push(statusMessage);
179-
logger.debug(
180-
`Sent status update to agent (token usage: ${usagePercentage}%)`,
181-
);
182+
logger.debug(`Sent status update to agent`);
182183
}
183184
}
184185

0 commit comments

Comments
 (0)