Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions app/lib/.server/llm/constants.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
// see https://docs.anthropic.com/en/docs/about-claude/models
export const MAX_TOKENS = 8000;
/*
* Maximum tokens for response generation (conservative default for older models)
* Modern models can handle much higher limits - specific limits are set per model
*/
export const MAX_TOKENS = 32000;

// limits the number of model responses that can be returned in a single request
export const MAX_RESPONSE_SEGMENTS = 2;
Expand Down
17 changes: 14 additions & 3 deletions app/lib/.server/llm/stream-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,14 @@ export async function streamText(props: {
modelDetails = modelsList.find((m) => m.name === currentModel);

if (!modelDetails) {
// Fallback to first model
// Check if it's a Google provider and the model name looks like it might be incorrect
if (provider.name === 'Google' && currentModel.includes('2.5')) {
throw new Error(
`Model "${currentModel}" not found. Gemini 2.5 Pro doesn't exist. Available Gemini models include: gemini-1.5-pro, gemini-2.0-flash, gemini-1.5-flash. Please select a valid model.`,
);
}

// Fallback to first model with warning
logger.warn(
`MODEL [${currentModel}] not found in provider [${provider.name}]. Falling back to first model. ${modelsList[0].name}`,
);
Expand All @@ -117,8 +124,12 @@ export async function streamText(props: {
}

const dynamicMaxTokens = modelDetails && modelDetails.maxTokenAllowed ? modelDetails.maxTokenAllowed : MAX_TOKENS;

// Ensure we never exceed reasonable token limits to prevent API errors
const safeMaxTokens = Math.min(dynamicMaxTokens, 100000); // Cap at 100k for safety

logger.info(
`Max tokens for model ${modelDetails.name} is ${dynamicMaxTokens} based on ${modelDetails.maxTokenAllowed} or ${MAX_TOKENS}`,
`Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
);

let systemPrompt =
Expand Down Expand Up @@ -203,7 +214,7 @@ export async function streamText(props: {
providerSettings,
}),
system: chatMode === 'build' ? systemPrompt : discussPrompt(),
maxTokens: dynamicMaxTokens,
maxTokens: safeMaxTokens,
messages: convertToCoreMessages(processedMessages as any),
...options,
});
Expand Down
63 changes: 36 additions & 27 deletions app/lib/modules/llm/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,24 @@ export default class AnthropicProvider extends BaseProvider {
};

staticModels: ModelInfo[] = [
/*
* Essential fallback models - only the most stable/reliable ones
* Claude 3.5 Sonnet: 200k context, excellent for complex reasoning and coding
*/
{
name: 'claude-3-7-sonnet-20250219',
label: 'Claude 3.7 Sonnet',
name: 'claude-3-5-sonnet-20241022',
label: 'Claude 3.5 Sonnet',
provider: 'Anthropic',
maxTokenAllowed: 128000,
},
{
name: 'claude-3-5-sonnet-latest',
label: 'Claude 3.5 Sonnet (new)',
provider: 'Anthropic',
maxTokenAllowed: 8000,
},
{
name: 'claude-3-5-sonnet-20240620',
label: 'Claude 3.5 Sonnet (old)',
provider: 'Anthropic',
maxTokenAllowed: 8000,
maxTokenAllowed: 200000,
},

// Claude 3 Haiku: 200k context, fastest and most cost-effective
{
name: 'claude-3-5-haiku-latest',
label: 'Claude 3.5 Haiku (new)',
name: 'claude-3-haiku-20240307',
label: 'Claude 3 Haiku',
provider: 'Anthropic',
maxTokenAllowed: 8000,
maxTokenAllowed: 200000,
},
{ name: 'claude-3-opus-latest', label: 'Claude 3 Opus', provider: 'Anthropic', maxTokenAllowed: 8000 },
{ name: 'claude-3-sonnet-20240229', label: 'Claude 3 Sonnet', provider: 'Anthropic', maxTokenAllowed: 8000 },
{ name: 'claude-3-haiku-20240307', label: 'Claude 3 Haiku', provider: 'Anthropic', maxTokenAllowed: 8000 },
];

async getDynamicModels(
Expand Down Expand Up @@ -71,12 +62,30 @@ export default class AnthropicProvider extends BaseProvider {

const data = res.data.filter((model: any) => model.type === 'model' && !staticModelIds.includes(model.id));

return data.map((m: any) => ({
name: m.id,
label: `${m.display_name}`,
provider: this.name,
maxTokenAllowed: 32000,
}));
return data.map((m: any) => {
// Get accurate context window from Anthropic API
let contextWindow = 32000; // default fallback

// Anthropic provides max_tokens in their API response
if (m.max_tokens) {
contextWindow = m.max_tokens;
} else if (m.id?.includes('claude-3-5-sonnet')) {
contextWindow = 200000; // Claude 3.5 Sonnet has 200k context
} else if (m.id?.includes('claude-3-haiku')) {
contextWindow = 200000; // Claude 3 Haiku has 200k context
} else if (m.id?.includes('claude-3-opus')) {
contextWindow = 200000; // Claude 3 Opus has 200k context
} else if (m.id?.includes('claude-3-sonnet')) {
contextWindow = 200000; // Claude 3 Sonnet has 200k context
}

return {
name: m.id,
label: `${m.display_name} (${Math.floor(contextWindow / 1000)}k context)`,
provider: this.name,
maxTokenAllowed: contextWindow,
};
});
}

getModelInstance: (options: {
Expand Down
75 changes: 55 additions & 20 deletions app/lib/modules/llm/providers/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,14 @@ export default class GoogleProvider extends BaseProvider {
};

staticModels: ModelInfo[] = [
{ name: 'gemini-1.5-flash-latest', label: 'Gemini 1.5 Flash', provider: 'Google', maxTokenAllowed: 8192 },
{
name: 'gemini-2.0-flash-thinking-exp-01-21',
label: 'Gemini 2.0 Flash-thinking-exp-01-21',
provider: 'Google',
maxTokenAllowed: 65536,
},
{ name: 'gemini-2.0-flash-exp', label: 'Gemini 2.0 Flash', provider: 'Google', maxTokenAllowed: 8192 },
{ name: 'gemini-1.5-flash-002', label: 'Gemini 1.5 Flash-002', provider: 'Google', maxTokenAllowed: 8192 },
{ name: 'gemini-1.5-flash-8b', label: 'Gemini 1.5 Flash-8b', provider: 'Google', maxTokenAllowed: 8192 },
{ name: 'gemini-1.5-pro-latest', label: 'Gemini 1.5 Pro', provider: 'Google', maxTokenAllowed: 8192 },
{ name: 'gemini-1.5-pro-002', label: 'Gemini 1.5 Pro-002', provider: 'Google', maxTokenAllowed: 8192 },
{ name: 'gemini-exp-1206', label: 'Gemini exp-1206', provider: 'Google', maxTokenAllowed: 8192 },
/*
* Essential fallback models - only the most reliable/stable ones
* Gemini 1.5 Pro: 2M context, excellent for complex reasoning and large codebases
*/
{ name: 'gemini-1.5-pro', label: 'Gemini 1.5 Pro', provider: 'Google', maxTokenAllowed: 2000000 },

// Gemini 1.5 Flash: 1M context, fast and cost-effective
{ name: 'gemini-1.5-flash', label: 'Gemini 1.5 Flash', provider: 'Google', maxTokenAllowed: 1000000 },
];

async getDynamicModels(
Expand All @@ -51,16 +46,56 @@ export default class GoogleProvider extends BaseProvider {
},
});

if (!response.ok) {
throw new Error(`Failed to fetch models from Google API: ${response.status} ${response.statusText}`);
}

const res = (await response.json()) as any;

const data = res.models.filter((model: any) => model.outputTokenLimit > 8000);
if (!res.models || !Array.isArray(res.models)) {
throw new Error('Invalid response format from Google API');
}

// Filter out models with very low token limits and experimental/unstable models
const data = res.models.filter((model: any) => {
const hasGoodTokenLimit = (model.outputTokenLimit || 0) > 8000;
const isStable = !model.name.includes('exp') || model.name.includes('flash-exp');

return hasGoodTokenLimit && isStable;
});

return data.map((m: any) => {
const modelName = m.name.replace('models/', '');

// Get accurate context window from Google API
let contextWindow = 32000; // default fallback

return data.map((m: any) => ({
name: m.name.replace('models/', ''),
label: `${m.displayName} - context ${Math.floor((m.inputTokenLimit + m.outputTokenLimit) / 1000) + 'k'}`,
provider: this.name,
maxTokenAllowed: m.inputTokenLimit + m.outputTokenLimit || 8000,
}));
if (m.inputTokenLimit && m.outputTokenLimit) {
// Use the input limit as the primary context window (typically larger)
contextWindow = m.inputTokenLimit;
} else if (modelName.includes('gemini-1.5-pro')) {
contextWindow = 2000000; // Gemini 1.5 Pro has 2M context
} else if (modelName.includes('gemini-1.5-flash')) {
contextWindow = 1000000; // Gemini 1.5 Flash has 1M context
} else if (modelName.includes('gemini-2.0-flash')) {
contextWindow = 1000000; // Gemini 2.0 Flash has 1M context
} else if (modelName.includes('gemini-pro')) {
contextWindow = 32000; // Gemini Pro has 32k context
} else if (modelName.includes('gemini-flash')) {
contextWindow = 32000; // Gemini Flash has 32k context
}

// Cap at reasonable limits to prevent issues
const maxAllowed = 2000000; // 2M tokens max
const finalContext = Math.min(contextWindow, maxAllowed);

return {
name: modelName,
label: `${m.displayName} (${finalContext >= 1000000 ? Math.floor(finalContext / 1000000) + 'M' : Math.floor(finalContext / 1000) + 'k'} context)`,
provider: this.name,
maxTokenAllowed: finalContext,
};
});
}

getModelInstance(options: {
Expand Down
19 changes: 10 additions & 9 deletions app/lib/modules/llm/providers/groq.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,18 @@ export default class GroqProvider extends BaseProvider {
};

staticModels: ModelInfo[] = [
{ name: 'llama-3.1-8b-instant', label: 'Llama 3.1 8b (Groq)', provider: 'Groq', maxTokenAllowed: 8000 },
{ name: 'llama-3.2-11b-vision-preview', label: 'Llama 3.2 11b (Groq)', provider: 'Groq', maxTokenAllowed: 8000 },
{ name: 'llama-3.2-90b-vision-preview', label: 'Llama 3.2 90b (Groq)', provider: 'Groq', maxTokenAllowed: 8000 },
{ name: 'llama-3.2-3b-preview', label: 'Llama 3.2 3b (Groq)', provider: 'Groq', maxTokenAllowed: 8000 },
{ name: 'llama-3.2-1b-preview', label: 'Llama 3.2 1b (Groq)', provider: 'Groq', maxTokenAllowed: 8000 },
{ name: 'llama-3.3-70b-versatile', label: 'Llama 3.3 70b (Groq)', provider: 'Groq', maxTokenAllowed: 8000 },
/*
* Essential fallback models - only the most stable/reliable ones
* Llama 3.1 8B: 128k context, fast and efficient
*/
{ name: 'llama-3.1-8b-instant', label: 'Llama 3.1 8B', provider: 'Groq', maxTokenAllowed: 128000 },

// Llama 3.3 70B: 128k context, most capable model
{
name: 'deepseek-r1-distill-llama-70b',
label: 'Deepseek R1 Distill Llama 70b (Groq)',
name: 'llama-3.3-70b-versatile',
label: 'Llama 3.3 70B',
provider: 'Groq',
maxTokenAllowed: 131072,
maxTokenAllowed: 128000,
},
];

Expand Down
69 changes: 26 additions & 43 deletions app/lib/modules/llm/providers/open-router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,50 +27,24 @@ export default class OpenRouterProvider extends BaseProvider {
};

staticModels: ModelInfo[] = [
/*
* Essential fallback models - only the most stable/reliable ones
* Claude 3.5 Sonnet via OpenRouter: 200k context
*/
{
name: 'anthropic/claude-3.5-sonnet',
label: 'Anthropic: Claude 3.5 Sonnet (OpenRouter)',
label: 'Claude 3.5 Sonnet',
provider: 'OpenRouter',
maxTokenAllowed: 8000,
},
{
name: 'anthropic/claude-3-haiku',
label: 'Anthropic: Claude 3 Haiku (OpenRouter)',
provider: 'OpenRouter',
maxTokenAllowed: 8000,
},
{
name: 'deepseek/deepseek-coder',
label: 'Deepseek-Coder V2 236B (OpenRouter)',
provider: 'OpenRouter',
maxTokenAllowed: 8000,
},
{
name: 'google/gemini-flash-1.5',
label: 'Google Gemini Flash 1.5 (OpenRouter)',
provider: 'OpenRouter',
maxTokenAllowed: 8000,
},
{
name: 'google/gemini-pro-1.5',
label: 'Google Gemini Pro 1.5 (OpenRouter)',
provider: 'OpenRouter',
maxTokenAllowed: 8000,
},
{ name: 'x-ai/grok-beta', label: 'xAI Grok Beta (OpenRouter)', provider: 'OpenRouter', maxTokenAllowed: 8000 },
{
name: 'mistralai/mistral-nemo',
label: 'OpenRouter Mistral Nemo (OpenRouter)',
provider: 'OpenRouter',
maxTokenAllowed: 8000,
maxTokenAllowed: 200000,
},

// GPT-4o via OpenRouter: 128k context
{
name: 'qwen/qwen-110b-chat',
label: 'OpenRouter Qwen 110b Chat (OpenRouter)',
name: 'openai/gpt-4o',
label: 'GPT-4o',
provider: 'OpenRouter',
maxTokenAllowed: 8000,
maxTokenAllowed: 128000,
},
{ name: 'cohere/command', label: 'Cohere Command (OpenRouter)', provider: 'OpenRouter', maxTokenAllowed: 4096 },
];

async getDynamicModels(
Expand All @@ -89,12 +63,21 @@ export default class OpenRouterProvider extends BaseProvider {

return data.data
.sort((a, b) => a.name.localeCompare(b.name))
.map((m) => ({
name: m.id,
label: `${m.name} - in:$${(m.pricing.prompt * 1_000_000).toFixed(2)} out:$${(m.pricing.completion * 1_000_000).toFixed(2)} - context ${Math.floor(m.context_length / 1000)}k`,
provider: this.name,
maxTokenAllowed: 8000,
}));
.map((m) => {
// Get accurate context window from OpenRouter API
const contextWindow = m.context_length || 32000; // Use API value or fallback

// Cap at reasonable limits to prevent issues (OpenRouter has some very large models)
const maxAllowed = 1000000; // 1M tokens max for safety
const finalContext = Math.min(contextWindow, maxAllowed);

return {
name: m.id,
label: `${m.name} - in:$${(m.pricing.prompt * 1_000_000).toFixed(2)} out:$${(m.pricing.completion * 1_000_000).toFixed(2)} - context ${finalContext >= 1000000 ? Math.floor(finalContext / 1000000) + 'M' : Math.floor(finalContext / 1000) + 'k'}`,
provider: this.name,
maxTokenAllowed: finalContext,
};
});
} catch (error) {
console.error('Error getting OpenRouter models:', error);
return [];
Expand Down
43 changes: 32 additions & 11 deletions app/lib/modules/llm/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ export default class OpenAIProvider extends BaseProvider {
};

staticModels: ModelInfo[] = [
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 8000 },
{ name: 'gpt-4o-mini', label: 'GPT-4o Mini', provider: 'OpenAI', maxTokenAllowed: 8000 },
{ name: 'gpt-4-turbo', label: 'GPT-4 Turbo', provider: 'OpenAI', maxTokenAllowed: 8000 },
{ name: 'gpt-4', label: 'GPT-4', provider: 'OpenAI', maxTokenAllowed: 8000 },
{ name: 'gpt-3.5-turbo', label: 'GPT-3.5 Turbo', provider: 'OpenAI', maxTokenAllowed: 8000 },
/*
* Essential fallback models - only the most stable/reliable ones
* GPT-4o: 128k context, high performance, recommended for most tasks
*/
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 128000 },

// GPT-3.5-turbo: 16k context, fast and cost-effective
{ name: 'gpt-3.5-turbo', label: 'GPT-3.5 Turbo', provider: 'OpenAI', maxTokenAllowed: 16000 },
];

async getDynamicModels(
Expand Down Expand Up @@ -53,12 +56,30 @@ export default class OpenAIProvider extends BaseProvider {
!staticModelIds.includes(model.id),
);

return data.map((m: any) => ({
name: m.id,
label: `${m.id}`,
provider: this.name,
maxTokenAllowed: m.context_window || 32000,
}));
return data.map((m: any) => {
// Get accurate context window from OpenAI API
let contextWindow = 32000; // default fallback

// OpenAI provides context_length in their API response
if (m.context_length) {
contextWindow = m.context_length;
} else if (m.id?.includes('gpt-4o')) {
contextWindow = 128000; // GPT-4o has 128k context
} else if (m.id?.includes('gpt-4-turbo') || m.id?.includes('gpt-4-1106')) {
contextWindow = 128000; // GPT-4 Turbo has 128k context
} else if (m.id?.includes('gpt-4')) {
contextWindow = 8192; // Standard GPT-4 has 8k context
} else if (m.id?.includes('gpt-3.5-turbo')) {
contextWindow = 16385; // GPT-3.5-turbo has 16k context
}

return {
name: m.id,
label: `${m.id} (${Math.floor(contextWindow / 1000)}k context)`,
provider: this.name,
maxTokenAllowed: Math.min(contextWindow, 128000), // Cap at 128k for safety
};
});
}

getModelInstance(options: {
Expand Down
Loading