Skip to content

Commit 38c1349

Browse files
authored
Update LLM providers and constants (#1937)
- Updated constants in app/lib/.server/llm/constants.ts - Modified stream-text functionality in app/lib/.server/llm/stream-text.ts - Updated Anthropic provider in app/lib/modules/llm/providers/anthropic.ts - Modified GitHub provider in app/lib/modules/llm/providers/github.ts - Updated Google provider in app/lib/modules/llm/providers/google.ts - Modified OpenAI provider in app/lib/modules/llm/providers/openai.ts - Updated LLM types in app/lib/modules/llm/types.ts - Modified API route in app/routes/api.llmcall.ts
1 parent b5d9055 commit 38c1349

File tree

8 files changed

+251
-26
lines changed

8 files changed

+251
-26
lines changed

app/lib/.server/llm/constants.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,44 @@
44
*/
55
export const MAX_TOKENS = 32000;
66

7+
/*
8+
* Provider-specific default completion token limits
9+
* Used as fallbacks when model doesn't specify maxCompletionTokens
10+
*/
11+
export const PROVIDER_COMPLETION_LIMITS: Record<string, number> = {
12+
OpenAI: 16384,
13+
Github: 16384, // GitHub Models use OpenAI-compatible limits
14+
Anthropic: 128000,
15+
Google: 32768,
16+
Cohere: 4000,
17+
DeepSeek: 8192,
18+
Groq: 8192,
19+
HuggingFace: 4096,
20+
Mistral: 8192,
21+
Ollama: 8192,
22+
OpenRouter: 8192,
23+
Perplexity: 8192,
24+
Together: 8192,
25+
xAI: 8192,
26+
LMStudio: 8192,
27+
OpenAILike: 8192,
28+
AmazonBedrock: 8192,
29+
Hyperbolic: 8192,
30+
};
31+
32+
/*
33+
* Reasoning models that require maxCompletionTokens instead of maxTokens
34+
* These models use internal reasoning tokens and have different API parameter requirements
35+
*/
36+
export function isReasoningModel(modelName: string): boolean {
37+
const result = /^(o1|o3|gpt-5)/i.test(modelName);
38+
39+
// DEBUG: Test regex matching
40+
console.log(`REGEX TEST: "${modelName}" matches reasoning pattern: ${result}`);
41+
42+
return result;
43+
}
44+
745
// limits the number of model responses that can be returned in a single request
846
export const MAX_RESPONSE_SEGMENTS = 2;
947

app/lib/.server/llm/stream-text.ts

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { convertToCoreMessages, streamText as _streamText, type Message } from 'ai';
2-
import { MAX_TOKENS, type FileMap } from './constants';
2+
import { MAX_TOKENS, PROVIDER_COMPLETION_LIMITS, isReasoningModel, type FileMap } from './constants';
33
import { getSystemPrompt } from '~/lib/common/prompts/prompts';
44
import { DEFAULT_MODEL, DEFAULT_PROVIDER, MODIFICATIONS_TAG_NAME, PROVIDER_LIST, WORK_DIR } from '~/utils/constants';
55
import type { IProviderSetting } from '~/types/model';
@@ -26,6 +26,23 @@ export interface StreamingOptions extends Omit<Parameters<typeof _streamText>[0]
2626

2727
const logger = createScopedLogger('stream-text');
2828

29+
function getCompletionTokenLimit(modelDetails: any): number {
30+
// 1. If model specifies completion tokens, use that
31+
if (modelDetails.maxCompletionTokens && modelDetails.maxCompletionTokens > 0) {
32+
return modelDetails.maxCompletionTokens;
33+
}
34+
35+
// 2. Use provider-specific default
36+
const providerDefault = PROVIDER_COMPLETION_LIMITS[modelDetails.provider];
37+
38+
if (providerDefault) {
39+
return providerDefault;
40+
}
41+
42+
// 3. Final fallback to MAX_TOKENS, but cap at reasonable limit for safety
43+
return Math.min(MAX_TOKENS, 16384);
44+
}
45+
2946
function sanitizeText(text: string): string {
3047
let sanitized = text.replace(/<div class=\\"__boltThought__\\">.*?<\/div>/s, '');
3148
sanitized = sanitized.replace(/<think>.*?<\/think>/s, '');
@@ -123,10 +140,10 @@ export async function streamText(props: {
123140
}
124141
}
125142

126-
const dynamicMaxTokens = modelDetails && modelDetails.maxTokenAllowed ? modelDetails.maxTokenAllowed : MAX_TOKENS;
143+
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
127144

128-
// Ensure we never exceed reasonable token limits to prevent API errors
129-
const safeMaxTokens = Math.min(dynamicMaxTokens, 100000); // Cap at 100k for safety
145+
// Additional safety cap - should not be needed with proper completion limits, but kept for safety
146+
const safeMaxTokens = Math.min(dynamicMaxTokens, 128000);
130147

131148
logger.info(
132149
`Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
@@ -204,18 +221,84 @@ export async function streamText(props: {
204221

205222
logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);
206223

224+
// DEBUG: Log reasoning model detection
225+
const isReasoning = isReasoningModel(modelDetails.name);
226+
logger.info(`DEBUG STREAM: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`);
227+
207228
// console.log(systemPrompt, processedMessages);
208229

209-
return await _streamText({
230+
// Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
231+
const tokenParams = isReasoning ? { maxCompletionTokens: safeMaxTokens } : { maxTokens: safeMaxTokens };
232+
233+
// Filter out unsupported parameters for reasoning models
234+
const filteredOptions =
235+
isReasoning && options
236+
? Object.fromEntries(
237+
Object.entries(options).filter(
238+
([key]) =>
239+
![
240+
'temperature',
241+
'topP',
242+
'presencePenalty',
243+
'frequencyPenalty',
244+
'logprobs',
245+
'topLogprobs',
246+
'logitBias',
247+
].includes(key),
248+
),
249+
)
250+
: options || {};
251+
252+
// DEBUG: Log filtered options
253+
logger.info(
254+
`DEBUG STREAM: Options filtering for model "${modelDetails.name}":`,
255+
JSON.stringify(
256+
{
257+
isReasoning,
258+
originalOptions: options || {},
259+
filteredOptions,
260+
originalOptionsKeys: options ? Object.keys(options) : [],
261+
filteredOptionsKeys: Object.keys(filteredOptions),
262+
removedParams: options ? Object.keys(options).filter((key) => !(key in filteredOptions)) : [],
263+
},
264+
null,
265+
2,
266+
),
267+
);
268+
269+
const streamParams = {
210270
model: provider.getModelInstance({
211271
model: modelDetails.name,
212272
serverEnv,
213273
apiKeys,
214274
providerSettings,
215275
}),
216276
system: chatMode === 'build' ? systemPrompt : discussPrompt(),
217-
maxTokens: safeMaxTokens,
277+
...tokenParams,
218278
messages: convertToCoreMessages(processedMessages as any),
219-
...options,
220-
});
279+
...filteredOptions,
280+
281+
// Set temperature to 1 for reasoning models (required by OpenAI API)
282+
...(isReasoning ? { temperature: 1 } : {}),
283+
};
284+
285+
// DEBUG: Log final streaming parameters
286+
logger.info(
287+
`DEBUG STREAM: Final streaming params for model "${modelDetails.name}":`,
288+
JSON.stringify(
289+
{
290+
hasTemperature: 'temperature' in streamParams,
291+
hasMaxTokens: 'maxTokens' in streamParams,
292+
hasMaxCompletionTokens: 'maxCompletionTokens' in streamParams,
293+
paramKeys: Object.keys(streamParams).filter((key) => !['model', 'messages', 'system'].includes(key)),
294+
streamParams: Object.fromEntries(
295+
Object.entries(streamParams).filter(([key]) => !['model', 'messages', 'system'].includes(key)),
296+
),
297+
},
298+
null,
299+
2,
300+
),
301+
);
302+
303+
return await _streamText(streamParams);
221304
}

app/lib/modules/llm/providers/anthropic.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ export default class AnthropicProvider extends BaseProvider {
2222
label: 'Claude 3.5 Sonnet',
2323
provider: 'Anthropic',
2424
maxTokenAllowed: 200000,
25+
maxCompletionTokens: 128000,
2526
},
2627

2728
// Claude 3 Haiku: 200k context, fastest and most cost-effective
@@ -30,6 +31,7 @@ export default class AnthropicProvider extends BaseProvider {
3031
label: 'Claude 3 Haiku',
3132
provider: 'Anthropic',
3233
maxTokenAllowed: 200000,
34+
maxCompletionTokens: 128000,
3335
},
3436
];
3537

@@ -84,6 +86,7 @@ export default class AnthropicProvider extends BaseProvider {
8486
label: `${m.display_name} (${Math.floor(contextWindow / 1000)}k context)`,
8587
provider: this.name,
8688
maxTokenAllowed: contextWindow,
89+
maxCompletionTokens: 128000, // Claude models support up to 128k completion tokens
8790
};
8891
});
8992
}

app/lib/modules/llm/providers/github.ts

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,31 @@ export default class GithubProvider extends BaseProvider {
1414

1515
// find more in https://github.com/marketplace?type=models
1616
staticModels: ModelInfo[] = [
17-
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'Github', maxTokenAllowed: 8000 },
18-
{ name: 'o1', label: 'o1-preview', provider: 'Github', maxTokenAllowed: 100000 },
19-
{ name: 'o1-mini', label: 'o1-mini', provider: 'Github', maxTokenAllowed: 8000 },
20-
{ name: 'gpt-4o-mini', label: 'GPT-4o Mini', provider: 'Github', maxTokenAllowed: 8000 },
21-
{ name: 'gpt-4-turbo', label: 'GPT-4 Turbo', provider: 'Github', maxTokenAllowed: 8000 },
22-
{ name: 'gpt-4', label: 'GPT-4', provider: 'Github', maxTokenAllowed: 8000 },
23-
{ name: 'gpt-3.5-turbo', label: 'GPT-3.5 Turbo', provider: 'Github', maxTokenAllowed: 8000 },
17+
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'Github', maxTokenAllowed: 128000, maxCompletionTokens: 16384 },
18+
{ name: 'o1', label: 'o1-preview', provider: 'Github', maxTokenAllowed: 100000, maxCompletionTokens: 16384 },
19+
{ name: 'o1-mini', label: 'o1-mini', provider: 'Github', maxTokenAllowed: 65536, maxCompletionTokens: 8192 },
20+
{
21+
name: 'gpt-4o-mini',
22+
label: 'GPT-4o Mini',
23+
provider: 'Github',
24+
maxTokenAllowed: 128000,
25+
maxCompletionTokens: 16384,
26+
},
27+
{
28+
name: 'gpt-4-turbo',
29+
label: 'GPT-4 Turbo',
30+
provider: 'Github',
31+
maxTokenAllowed: 128000,
32+
maxCompletionTokens: 8192,
33+
},
34+
{ name: 'gpt-4', label: 'GPT-4', provider: 'Github', maxTokenAllowed: 8192, maxCompletionTokens: 8192 },
35+
{
36+
name: 'gpt-3.5-turbo',
37+
label: 'GPT-3.5 Turbo',
38+
provider: 'Github',
39+
maxTokenAllowed: 16385,
40+
maxCompletionTokens: 4096,
41+
},
2442
];
2543

2644
getModelInstance(options: {

app/lib/modules/llm/providers/google.ts

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,22 @@ export default class GoogleProvider extends BaseProvider {
1717
* Essential fallback models - only the most reliable/stable ones
1818
* Gemini 1.5 Pro: 2M context, excellent for complex reasoning and large codebases
1919
*/
20-
{ name: 'gemini-1.5-pro', label: 'Gemini 1.5 Pro', provider: 'Google', maxTokenAllowed: 2000000 },
20+
{
21+
name: 'gemini-1.5-pro',
22+
label: 'Gemini 1.5 Pro',
23+
provider: 'Google',
24+
maxTokenAllowed: 2000000,
25+
maxCompletionTokens: 32768,
26+
},
2127

2228
// Gemini 1.5 Flash: 1M context, fast and cost-effective
23-
{ name: 'gemini-1.5-flash', label: 'Gemini 1.5 Flash', provider: 'Google', maxTokenAllowed: 1000000 },
29+
{
30+
name: 'gemini-1.5-flash',
31+
label: 'Gemini 1.5 Flash',
32+
provider: 'Google',
33+
maxTokenAllowed: 1000000,
34+
maxCompletionTokens: 32768,
35+
},
2436
];
2537

2638
async getDynamicModels(
@@ -89,11 +101,19 @@ export default class GoogleProvider extends BaseProvider {
89101
const maxAllowed = 2000000; // 2M tokens max
90102
const finalContext = Math.min(contextWindow, maxAllowed);
91103

104+
// Get completion token limit from Google API
105+
let completionTokens = 32768; // default fallback
106+
107+
if (m.outputTokenLimit && m.outputTokenLimit > 0) {
108+
completionTokens = Math.min(m.outputTokenLimit, 128000); // Cap at reasonable limit
109+
}
110+
92111
return {
93112
name: modelName,
94113
label: `${m.displayName} (${finalContext >= 1000000 ? Math.floor(finalContext / 1000000) + 'M' : Math.floor(finalContext / 1000) + 'k'} context)`,
95114
provider: this.name,
96115
maxTokenAllowed: finalContext,
116+
maxCompletionTokens: completionTokens,
97117
};
98118
});
99119
}

app/lib/modules/llm/providers/openai.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,16 @@ export default class OpenAIProvider extends BaseProvider {
1717
* Essential fallback models - only the most stable/reliable ones
1818
* GPT-4o: 128k context, high performance, recommended for most tasks
1919
*/
20-
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 128000 },
20+
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 128000, maxCompletionTokens: 16384 },
2121

2222
// GPT-3.5-turbo: 16k context, fast and cost-effective
23-
{ name: 'gpt-3.5-turbo', label: 'GPT-3.5 Turbo', provider: 'OpenAI', maxTokenAllowed: 16000 },
23+
{
24+
name: 'gpt-3.5-turbo',
25+
label: 'GPT-3.5 Turbo',
26+
provider: 'OpenAI',
27+
maxTokenAllowed: 16000,
28+
maxCompletionTokens: 4096,
29+
},
2430
];
2531

2632
async getDynamicModels(

app/lib/modules/llm/types.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@ export interface ModelInfo {
55
name: string;
66
label: string;
77
provider: string;
8+
9+
/** Maximum context window size (input tokens) - how many tokens the model can process */
810
maxTokenAllowed: number;
11+
12+
/** Maximum completion/output tokens - how many tokens the model can generate. If not specified, falls back to provider defaults */
13+
maxCompletionTokens?: number;
914
}
1015

1116
export interface ProviderInfo {

0 commit comments

Comments
 (0)