Skip to content

Commit c8f13d5

Browse files
Enable custom HTTP response timeout for ollama (#4448)
1 parent ac444c8 commit c8f13d5

File tree

4 files changed

+47
-1
lines changed

4 files changed

+47
-1
lines changed

docker/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ GID='1000'
4444
# OLLAMA_MODEL_PREF='llama2'
4545
# OLLAMA_MODEL_TOKEN_LIMIT=4096
4646
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
47+
# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)
4748

4849
# LLM_PROVIDER='togetherai'
4950
# TOGETHER_AI_API_KEY='my-together-ai-key'

server/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
4141
# OLLAMA_MODEL_PREF='llama2'
4242
# OLLAMA_MODEL_TOKEN_LIMIT=4096
4343
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
44+
# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)
4445

4546
# LLM_PROVIDER='togetherai'
4647
# TOGETHER_AI_API_KEY='my-together-ai-key'

server/utils/AiProviders/ollama/index.js

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ class OllamaAILLM {
3131
const headers = this.authToken
3232
? { Authorization: `Bearer ${this.authToken}` }
3333
: {};
34-
this.client = new Ollama({ host: this.basePath, headers: headers });
34+
this.client = new Ollama({
35+
host: this.basePath,
36+
headers: headers,
37+
fetch: this.#applyFetch(),
38+
});
3539
this.embedder = embedder ?? new NativeEmbedder();
3640
this.defaultTemp = 0.7;
3741
this.#log(
@@ -55,6 +59,43 @@ class OllamaAILLM {
5559
);
5660
}
5761

62+
/**
63+
* Apply a custom fetch function to the Ollama client.
64+
* This is useful when we want to bypass the default 5m timeout for global fetch
65+
* for machines which run responses very slowly.
66+
* @returns {Function} The custom fetch function.
67+
*/
68+
#applyFetch() {
69+
try {
70+
if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
71+
const { Agent } = require("undici");
72+
const moment = require("moment");
73+
let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;
74+
75+
if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
76+
this.#log(
77+
"Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
78+
{ timeout }
79+
);
80+
return fetch;
81+
} else timeout = Number(timeout);
82+
83+
const noTimeoutFetch = (input, init = {}) => {
84+
return fetch(input, {
85+
...init,
86+
dispatcher: new Agent({ headersTimeout: timeout }),
87+
});
88+
};
89+
90+
const humanDiff = moment.duration(timeout).humanize();
91+
this.#log(`Applying custom fetch w/timeout of ${humanDiff}.`);
92+
return noTimeoutFetch;
93+
} catch (error) {
94+
this.#log("Error applying custom fetch - using default fetch", error);
95+
return fetch;
96+
}
97+
}
98+
5899
streamingEnabled() {
59100
return "streamGetChatCompletion" in this;
60101
}

server/utils/helpers/updateENV.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,9 @@ function dumpENV() {
11701170

11711171
// Specify Chromium args for collector
11721172
"ANYTHINGLLM_CHROMIUM_ARGS",
1173+
1174+
// Allow setting a custom response timeout for Ollama
1175+
"OLLAMA_RESPONSE_TIMEOUT",
11731176
];
11741177

11751178
// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.

0 commit comments

Comments
 (0)