Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions server/utils/AiProviders/ollama/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ class OllamaAILLM {
if (chunk.done) {
usage.prompt_tokens = chunk.prompt_eval_count;
usage.completion_tokens = chunk.eval_count;
usage.eval_duration = chunk.eval_duration / 1e9;
writeResponseChunk(response, {
uuid,
sources,
Expand All @@ -362,9 +363,6 @@ class OllamaAILLM {
});
response.removeListener("close", handleAbort);
stream?.endMeasurement(usage);
stream.metrics.eval_duration = chunk.eval_duration / 1e9;
stream.metrics.outputTps =
stream.metrics.completion_tokens / stream.metrics.eval_duration;
resolve(fullText);
break;
}
Expand Down
8 changes: 7 additions & 1 deletion server/utils/helpers/chat/LLMPerformanceMonitor.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const { TokenManager } = require("../tiktoken");
* @property {number} total_tokens - the total number of tokens
* @property {number} outputTps - the tokens per second of the output
* @property {number} duration - the duration of the stream
* @property {number} [eval_duration] - optional eval duration from providers (e.g., Ollama) used for more accurate outputTps calculation
*/

/**
Expand Down Expand Up @@ -88,7 +89,12 @@ class LLMPerformanceMonitor {

stream.metrics.total_tokens =
stream.metrics.prompt_tokens + (stream.metrics.completion_tokens || 0);
stream.metrics.outputTps = stream.metrics.completion_tokens / duration;

// Use eval_duration if provided (for providers like Ollama that report it)
// otherwise fall back to total request duration
stream.metrics.outputTps =
stream.metrics.completion_tokens /
(stream.metrics.eval_duration || duration);
stream.metrics.duration = duration;
return stream.metrics;
};
Expand Down