Skip to content

Commit 7bfb94d

Browse files
committed
Add response stats to the chat
1 parent 7d8f412 commit 7bfb94d

File tree

1 file changed

+55
-2
lines changed

1 file changed

+55
-2
lines changed

llamafile/server/www/chatbot.js

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,44 @@ function wrapMessageElement(messageElement, role) {
6868
if (role == "assistant") {
6969
const controlContainer = document.createElement("div");
7070
controlContainer.appendChild(createCopyButton(() => messageElement.textContent, () => messageElement.innerHTML));
71+
controlContainer.appendChild(infoButton());
7172
controlContainer.classList.add("message-controls");
7273
wrapper.appendChild(controlContainer);
7374
}
7475
wrapper.classList.add("message-wrapper", role);
7576
return wrapper;
7677
}
7778

79+
function infoButton(container, stats) {
80+
let button = container?.querySelector("#stats");
81+
if (!button) {
82+
button = document.createElement("button");
83+
button.id = "stats";
84+
button.innerText = "i";
85+
button.style.fontFamily = "monospace";
86+
}
87+
button.style.display = stats ? "" : "none";
88+
if (stats) {
89+
const parts = [];
90+
const promptDurationMs = stats.firstContentTime - stats.startTime;
91+
const responseDurationMs = stats.endTime - stats.firstContentTime;
92+
if (promptDurationMs > 0 && stats.promptTokenCount > 0) {
93+
const tokensPerSecond = (stats.promptTokenCount / (promptDurationMs / 1000)).toFixed(2);
94+
const durationString = promptDurationMs >= 1000 ? `${(promptDurationMs / 1000).toFixed(2)}s` : `${promptDurationMs}ms`;
95+
parts.push(`Processed ${stats.promptTokenCount} input tokens in ${durationString} (${tokensPerSecond} tokens/s)`);
96+
}
97+
if (responseDurationMs > 0 && stats.reponseTokenCount > 0) {
98+
const tokensPerSecond = (stats.reponseTokenCount / (responseDurationMs / 1000)).toFixed(2);
99+
const durationString = responseDurationMs >= 1000 ? `${(responseDurationMs / 1000).toFixed(2)}s` : `${promptDurationMs}ms`;
100+
parts.push(`Generated ${stats.reponseTokenCount} tokens in ${durationString} (${tokensPerSecond} tokens/s)`)
101+
} else {
102+
parts.push("Incomplete");
103+
}
104+
button.title = parts.join("\n");
105+
}
106+
return button;
107+
}
108+
78109
function createMessageElement(content) {
79110
const messageDiv = document.createElement("div");
80111
messageDiv.classList.add("message");
@@ -126,6 +157,13 @@ async function handleChatStream(response) {
126157
streamingMessageContent = [];
127158
const prefillStatus = document.getElementById('prefill-status');
128159
const progressBar = prefillStatus.querySelector('.progress-bar');
160+
const stats = {
161+
startTime: Date.now(), // Timestamp when the request started
162+
firstContentTime: null, // Timestamp when the first content was received
163+
endTime: null, // Timestamp when the response was fully received
164+
promptTokenCount: 0, // Number of tokens in the prompt
165+
reponseTokenCount: 0 // Number of tokens in the response
166+
};
129167

130168
try {
131169
while (true) {
@@ -154,7 +192,11 @@ async function handleChatStream(response) {
154192
prefillStatus.style.display = "flex";
155193
progressBar.style.width = `${parsed.x_prefill_progress * 100}%`;
156194
} else {
157-
prefillStatus.style.display = "none";
195+
if (content && !stats.firstContentTime) {
196+
// Finished parsing the prompt
197+
stats.firstContentTime = Date.now();
198+
prefillStatus.style.display = "none";
199+
}
158200
}
159201

160202
if (content && !messageAppended) {
@@ -171,6 +213,11 @@ async function handleChatStream(response) {
171213
high.feed(content);
172214
scrollToBottom();
173215
}
216+
if (parsed.usage) {
217+
stats.endTime = Date.now()
218+
stats.promptTokenCount = parsed.usage.prompt_tokens
219+
stats.reponseTokenCount = parsed.usage.completion_tokens
220+
}
174221
} catch (e) {
175222
console.error("Error parsing JSON:", e);
176223
}
@@ -186,6 +233,9 @@ async function handleChatStream(response) {
186233
}
187234
} finally {
188235
if (messageAppended) {
236+
stats.firstContentTime = stats.firstContentTime ?? Date.now();
237+
stats.endTime = stats.endTime ?? Date.now();
238+
infoButton(currentMessageWrapper, stats);
189239
high.flush();
190240
// we don't supply max_tokens, so "length" can
191241
// only mean that we ran out of context window
@@ -257,7 +307,10 @@ async function sendMessage() {
257307
top_p: settings.top_p,
258308
presence_penalty: settings.presence_penalty,
259309
frequency_penalty: settings.frequency_penalty,
260-
stream: true
310+
stream: true,
311+
stream_options: {
312+
include_usage: true
313+
}
261314
}),
262315
signal: abortController.signal
263316
});

0 commit comments

Comments
 (0)