Skip to content

Commit 0a4586e

Browse files
committed
feat: add DEFAULT_MAX_TOKENS env variable for global token limit
1 parent a857af6 commit 0a4586e

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

src/lib/server/config.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ type ExtraConfigKeys =
160160
| "MCP_SERVERS"
161161
| "MCP_FORWARD_HF_USER_TOKEN"
162162
| "MCP_TOOL_TIMEOUT_MS"
163-
| "EXA_API_KEY";
163+
| "EXA_API_KEY"
164+
| "DEFAULT_MAX_TOKENS";
164165

165166
type ConfigProxy = ConfigManager & { [K in ConfigKey | ExtraConfigKeys]: string };
166167

src/lib/server/endpoints/openai/endpointOai.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,14 @@ export async function endpointOai(
130130
});
131131

132132
const parameters = { ...model.parameters, ...generateSettings };
133+
const parsedMaxTokens = Number(config.DEFAULT_MAX_TOKENS);
134+
const defaultMaxTokens =
135+
Number.isInteger(parsedMaxTokens) && parsedMaxTokens > 0 ? parsedMaxTokens : undefined;
133136
const body: CompletionCreateParamsStreaming = {
134137
model: model.id ?? model.name,
135138
prompt,
136139
stream: true,
137-
max_tokens: parameters?.max_tokens,
140+
max_tokens: parameters?.max_tokens ?? defaultMaxTokens,
138141
stop: parameters?.stop,
139142
temperature: parameters?.temperature,
140143
top_p: parameters?.top_p,
@@ -195,14 +198,18 @@ export async function endpointOai(
195198

196199
// Combine model defaults with request-specific parameters
197200
const parameters = { ...model.parameters, ...generateSettings };
201+
const parsedMaxTokens = Number(config.DEFAULT_MAX_TOKENS);
202+
const defaultMaxTokens =
203+
Number.isInteger(parsedMaxTokens) && parsedMaxTokens > 0 ? parsedMaxTokens : undefined;
204+
const effectiveMaxTokens = parameters?.max_tokens ?? defaultMaxTokens;
198205
const body = {
199206
model: model.id ?? model.name,
200207
messages: messagesOpenAI,
201208
stream: streamingSupported,
202209
// Support two different ways of specifying token limits depending on the model
203210
...(useCompletionTokens
204-
? { max_completion_tokens: parameters?.max_tokens }
205-
: { max_tokens: parameters?.max_tokens }),
211+
? { max_completion_tokens: effectiveMaxTokens }
212+
: { max_tokens: effectiveMaxTokens }),
206213
stop: parameters?.stop,
207214
temperature: parameters?.temperature,
208215
top_p: parameters?.top_p,

0 commit comments

Comments
 (0)