Skip to content

Commit ff8b0d7

Browse files
authored
fix: maxCompletionTokens Implementation for All Providers (#1938)
* Update LLM providers and constants - Updated constants in app/lib/.server/llm/constants.ts - Modified stream-text functionality in app/lib/.server/llm/stream-text.ts - Updated Anthropic provider in app/lib/modules/llm/providers/anthropic.ts - Modified GitHub provider in app/lib/modules/llm/providers/github.ts - Updated Google provider in app/lib/modules/llm/providers/google.ts - Modified OpenAI provider in app/lib/modules/llm/providers/openai.ts - Updated LLM types in app/lib/modules/llm/types.ts - Modified API route in app/routes/api.llmcall.ts * Fix maxCompletionTokens Implementation for All Providers - Cohere: Added maxCompletionTokens: 4000 to all 10 static models - DeepSeek: Added maxCompletionTokens: 8192 to all 3 static models - Groq: Added maxCompletionTokens: 8192 to both static models - Mistral: Added maxCompletionTokens: 8192 to all 9 static models - Together: Added maxCompletionTokens: 8192 to both static models - Groq: Fixed getDynamicModels to include maxCompletionTokens: 8192 - Together: Fixed getDynamicModels to include maxCompletionTokens: 8192 - OpenAI: Fixed getDynamicModels with proper logic for reasoning models (o1: 16384, o1-mini: 8192) and standard models
1 parent 38c1349 commit ff8b0d7

File tree

6 files changed

+169
-23
lines changed

6 files changed

+169
-23
lines changed

app/lib/modules/llm/providers/cohere.ts

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,64 @@ export default class CohereProvider extends BaseProvider {
1313
};
1414

1515
staticModels: ModelInfo[] = [
16-
{ name: 'command-r-plus-08-2024', label: 'Command R plus Latest', provider: 'Cohere', maxTokenAllowed: 4096 },
17-
{ name: 'command-r-08-2024', label: 'Command R Latest', provider: 'Cohere', maxTokenAllowed: 4096 },
18-
{ name: 'command-r-plus', label: 'Command R plus', provider: 'Cohere', maxTokenAllowed: 4096 },
19-
{ name: 'command-r', label: 'Command R', provider: 'Cohere', maxTokenAllowed: 4096 },
20-
{ name: 'command', label: 'Command', provider: 'Cohere', maxTokenAllowed: 4096 },
21-
{ name: 'command-nightly', label: 'Command Nightly', provider: 'Cohere', maxTokenAllowed: 4096 },
22-
{ name: 'command-light', label: 'Command Light', provider: 'Cohere', maxTokenAllowed: 4096 },
23-
{ name: 'command-light-nightly', label: 'Command Light Nightly', provider: 'Cohere', maxTokenAllowed: 4096 },
24-
{ name: 'c4ai-aya-expanse-8b', label: 'c4AI Aya Expanse 8b', provider: 'Cohere', maxTokenAllowed: 4096 },
25-
{ name: 'c4ai-aya-expanse-32b', label: 'c4AI Aya Expanse 32b', provider: 'Cohere', maxTokenAllowed: 4096 },
16+
{
17+
name: 'command-r-plus-08-2024',
18+
label: 'Command R plus Latest',
19+
provider: 'Cohere',
20+
maxTokenAllowed: 4096,
21+
maxCompletionTokens: 4000,
22+
},
23+
{
24+
name: 'command-r-08-2024',
25+
label: 'Command R Latest',
26+
provider: 'Cohere',
27+
maxTokenAllowed: 4096,
28+
maxCompletionTokens: 4000,
29+
},
30+
{
31+
name: 'command-r-plus',
32+
label: 'Command R plus',
33+
provider: 'Cohere',
34+
maxTokenAllowed: 4096,
35+
maxCompletionTokens: 4000,
36+
},
37+
{ name: 'command-r', label: 'Command R', provider: 'Cohere', maxTokenAllowed: 4096, maxCompletionTokens: 4000 },
38+
{ name: 'command', label: 'Command', provider: 'Cohere', maxTokenAllowed: 4096, maxCompletionTokens: 4000 },
39+
{
40+
name: 'command-nightly',
41+
label: 'Command Nightly',
42+
provider: 'Cohere',
43+
maxTokenAllowed: 4096,
44+
maxCompletionTokens: 4000,
45+
},
46+
{
47+
name: 'command-light',
48+
label: 'Command Light',
49+
provider: 'Cohere',
50+
maxTokenAllowed: 4096,
51+
maxCompletionTokens: 4000,
52+
},
53+
{
54+
name: 'command-light-nightly',
55+
label: 'Command Light Nightly',
56+
provider: 'Cohere',
57+
maxTokenAllowed: 4096,
58+
maxCompletionTokens: 4000,
59+
},
60+
{
61+
name: 'c4ai-aya-expanse-8b',
62+
label: 'c4AI Aya Expanse 8b',
63+
provider: 'Cohere',
64+
maxTokenAllowed: 4096,
65+
maxCompletionTokens: 4000,
66+
},
67+
{
68+
name: 'c4ai-aya-expanse-32b',
69+
label: 'c4AI Aya Expanse 32b',
70+
provider: 'Cohere',
71+
maxTokenAllowed: 4096,
72+
maxCompletionTokens: 4000,
73+
},
2674
];
2775

2876
getModelInstance(options: {

app/lib/modules/llm/providers/deepseek.ts

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,27 @@ export default class DeepseekProvider extends BaseProvider {
1313
};
1414

1515
staticModels: ModelInfo[] = [
16-
{ name: 'deepseek-coder', label: 'Deepseek-Coder', provider: 'Deepseek', maxTokenAllowed: 8000 },
17-
{ name: 'deepseek-chat', label: 'Deepseek-Chat', provider: 'Deepseek', maxTokenAllowed: 8000 },
18-
{ name: 'deepseek-reasoner', label: 'Deepseek-Reasoner', provider: 'Deepseek', maxTokenAllowed: 8000 },
16+
{
17+
name: 'deepseek-coder',
18+
label: 'Deepseek-Coder',
19+
provider: 'Deepseek',
20+
maxTokenAllowed: 8000,
21+
maxCompletionTokens: 8192,
22+
},
23+
{
24+
name: 'deepseek-chat',
25+
label: 'Deepseek-Chat',
26+
provider: 'Deepseek',
27+
maxTokenAllowed: 8000,
28+
maxCompletionTokens: 8192,
29+
},
30+
{
31+
name: 'deepseek-reasoner',
32+
label: 'Deepseek-Reasoner',
33+
provider: 'Deepseek',
34+
maxTokenAllowed: 8000,
35+
maxCompletionTokens: 8192,
36+
},
1937
];
2038

2139
getModelInstance(options: {

app/lib/modules/llm/providers/groq.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,21 @@ export default class GroqProvider extends BaseProvider {
1717
* Essential fallback models - only the most stable/reliable ones
1818
* Llama 3.1 8B: 128k context, fast and efficient
1919
*/
20-
{ name: 'llama-3.1-8b-instant', label: 'Llama 3.1 8B', provider: 'Groq', maxTokenAllowed: 128000 },
20+
{
21+
name: 'llama-3.1-8b-instant',
22+
label: 'Llama 3.1 8B',
23+
provider: 'Groq',
24+
maxTokenAllowed: 128000,
25+
maxCompletionTokens: 8192,
26+
},
2127

2228
// Llama 3.3 70B: 128k context, most capable model
2329
{
2430
name: 'llama-3.3-70b-versatile',
2531
label: 'Llama 3.3 70B',
2632
provider: 'Groq',
2733
maxTokenAllowed: 128000,
34+
maxCompletionTokens: 8192,
2835
},
2936
];
3037

@@ -62,6 +69,7 @@ export default class GroqProvider extends BaseProvider {
6269
label: `${m.id} - context ${m.context_window ? Math.floor(m.context_window / 1000) + 'k' : 'N/A'} [ by ${m.owned_by}]`,
6370
provider: this.name,
6471
maxTokenAllowed: Math.min(m.context_window || 8192, 16384),
72+
maxCompletionTokens: 8192,
6573
}));
6674
}
6775

app/lib/modules/llm/providers/mistral.ts

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,69 @@ export default class MistralProvider extends BaseProvider {
1313
};
1414

1515
staticModels: ModelInfo[] = [
16-
{ name: 'open-mistral-7b', label: 'Mistral 7B', provider: 'Mistral', maxTokenAllowed: 8000 },
17-
{ name: 'open-mixtral-8x7b', label: 'Mistral 8x7B', provider: 'Mistral', maxTokenAllowed: 8000 },
18-
{ name: 'open-mixtral-8x22b', label: 'Mistral 8x22B', provider: 'Mistral', maxTokenAllowed: 8000 },
19-
{ name: 'open-codestral-mamba', label: 'Codestral Mamba', provider: 'Mistral', maxTokenAllowed: 8000 },
20-
{ name: 'open-mistral-nemo', label: 'Mistral Nemo', provider: 'Mistral', maxTokenAllowed: 8000 },
21-
{ name: 'ministral-8b-latest', label: 'Mistral 8B', provider: 'Mistral', maxTokenAllowed: 8000 },
22-
{ name: 'mistral-small-latest', label: 'Mistral Small', provider: 'Mistral', maxTokenAllowed: 8000 },
23-
{ name: 'codestral-latest', label: 'Codestral', provider: 'Mistral', maxTokenAllowed: 8000 },
24-
{ name: 'mistral-large-latest', label: 'Mistral Large Latest', provider: 'Mistral', maxTokenAllowed: 8000 },
16+
{
17+
name: 'open-mistral-7b',
18+
label: 'Mistral 7B',
19+
provider: 'Mistral',
20+
maxTokenAllowed: 8000,
21+
maxCompletionTokens: 8192,
22+
},
23+
{
24+
name: 'open-mixtral-8x7b',
25+
label: 'Mistral 8x7B',
26+
provider: 'Mistral',
27+
maxTokenAllowed: 8000,
28+
maxCompletionTokens: 8192,
29+
},
30+
{
31+
name: 'open-mixtral-8x22b',
32+
label: 'Mistral 8x22B',
33+
provider: 'Mistral',
34+
maxTokenAllowed: 8000,
35+
maxCompletionTokens: 8192,
36+
},
37+
{
38+
name: 'open-codestral-mamba',
39+
label: 'Codestral Mamba',
40+
provider: 'Mistral',
41+
maxTokenAllowed: 8000,
42+
maxCompletionTokens: 8192,
43+
},
44+
{
45+
name: 'open-mistral-nemo',
46+
label: 'Mistral Nemo',
47+
provider: 'Mistral',
48+
maxTokenAllowed: 8000,
49+
maxCompletionTokens: 8192,
50+
},
51+
{
52+
name: 'ministral-8b-latest',
53+
label: 'Mistral 8B',
54+
provider: 'Mistral',
55+
maxTokenAllowed: 8000,
56+
maxCompletionTokens: 8192,
57+
},
58+
{
59+
name: 'mistral-small-latest',
60+
label: 'Mistral Small',
61+
provider: 'Mistral',
62+
maxTokenAllowed: 8000,
63+
maxCompletionTokens: 8192,
64+
},
65+
{
66+
name: 'codestral-latest',
67+
label: 'Codestral',
68+
provider: 'Mistral',
69+
maxTokenAllowed: 8000,
70+
maxCompletionTokens: 8192,
71+
},
72+
{
73+
name: 'mistral-large-latest',
74+
label: 'Mistral Large Latest',
75+
provider: 'Mistral',
76+
maxTokenAllowed: 8000,
77+
maxCompletionTokens: 8192,
78+
},
2579
];
2680

2781
getModelInstance(options: {

app/lib/modules/llm/providers/openai.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,26 @@ export default class OpenAIProvider extends BaseProvider {
7979
contextWindow = 16385; // GPT-3.5-turbo has 16k context
8080
}
8181

82+
// Determine completion token limits based on model type
83+
let maxCompletionTokens = 16384; // default for most models
84+
85+
if (m.id?.startsWith('o1-preview') || m.id?.startsWith('o1-mini') || m.id?.startsWith('o1')) {
86+
// Reasoning models have specific completion limits
87+
maxCompletionTokens = m.id?.includes('mini') ? 8192 : 16384;
88+
} else if (m.id?.includes('gpt-4o')) {
89+
maxCompletionTokens = 16384;
90+
} else if (m.id?.includes('gpt-4')) {
91+
maxCompletionTokens = 8192;
92+
} else if (m.id?.includes('gpt-3.5-turbo')) {
93+
maxCompletionTokens = 4096;
94+
}
95+
8296
return {
8397
name: m.id,
8498
label: `${m.id} (${Math.floor(contextWindow / 1000)}k context)`,
8599
provider: this.name,
86100
maxTokenAllowed: Math.min(contextWindow, 128000), // Cap at 128k for safety
101+
maxCompletionTokens,
87102
};
88103
});
89104
}

app/lib/modules/llm/providers/together.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ export default class TogetherProvider extends BaseProvider {
2222
label: 'Llama 3.2 90B Vision',
2323
provider: 'Together',
2424
maxTokenAllowed: 128000,
25+
maxCompletionTokens: 8192,
2526
},
2627

2728
// Mixtral 8x7B: 32k context, strong performance
@@ -30,6 +31,7 @@ export default class TogetherProvider extends BaseProvider {
3031
label: 'Mixtral 8x7B Instruct',
3132
provider: 'Together',
3233
maxTokenAllowed: 32000,
34+
maxCompletionTokens: 8192,
3335
},
3436
];
3537

@@ -67,6 +69,7 @@ export default class TogetherProvider extends BaseProvider {
6769
label: `${m.display_name} - in:$${m.pricing.input.toFixed(2)} out:$${m.pricing.output.toFixed(2)} - context ${Math.floor(m.context_length / 1000)}k`,
6870
provider: this.name,
6971
maxTokenAllowed: 8000,
72+
maxCompletionTokens: 8192,
7073
}));
7174
}
7275

0 commit comments

Comments
 (0)