Skip to content

Commit dd09094

Browse files
committed
add tok/s info
1 parent 95e294b commit dd09094

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed

examples/server/webui/index.html

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -121,22 +121,24 @@ <h2 class="font-bold ml-4">Conversations</h2>
121121
</div>
122122
<div v-for="msg in messages" class="group">
123123
<message-bubble
124+
:config="config"
124125
:msg="msg"
125126
:key="msg.id"
126127
:is-generating="isGenerating"
127128
:edit-user-msg-and-regenerate="editUserMsgAndRegenerate"
128129
:regenerate-msg="regenerateMsg"></message-bubble>
129130
</div>
130131

131-
132-
133132
<!-- pending (ongoing) assistant message -->
134-
<div id="pending-msg" class="chat chat-start">
135-
<div v-if="pendingMsg" class="chat-bubble markdown chat-bubble-base-300">
136-
<span v-if="!pendingMsg.content" class="loading loading-dots loading-md"></span>
137-
<vue-markdown v-else :source="pendingMsg.content" />
138-
</div>
139-
</div>
133+
<div id="pending-msg"></div>
134+
<message-bubble
135+
v-if="pendingMsg"
136+
:config="config"
137+
:msg="pendingMsg"
138+
:key="pendingMsg.id"
139+
:is-generating="isGenerating"
140+
:edit-user-msg-and-regenerate="() => {}"
141+
:regenerate-msg="() => {}"></message-bubble>
140142
</div>
141143

142144
<!-- chat input -->
@@ -199,7 +201,7 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
199201
<summary class="collapse-title font-bold">Advanced config</summary>
200202
<div class="collapse-content">
201203
<div class="flex flex-row items-center mb-2">
202-
<input type="checkbox" class="checkbox" v-model="config.show_tokens_per_second" />
204+
<input type="checkbox" class="checkbox" v-model="config.showTokensPerSecond" />
203205
<span class="ml-4">Show tokens per second</span>
204206
</div>
205207
<label class="form-control mb-2">
@@ -243,12 +245,30 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
243245
<button class="btn btn-ghost mt-2 mr-2" @click="editingContent = null">Cancel</button>
244246
<button class="btn mt-2" @click="editMsg()">Submit</button>
245247
</template>
246-
<!-- render message as markdown -->
247-
<vue-markdown v-else :source="msg.content" />
248+
<template v-else>
249+
<!-- show loading dots for pending message -->
250+
<span v-if="msg.content === null" class="loading loading-dots loading-md"></span>
251+
<!-- render message as markdown -->
252+
<vue-markdown v-else :source="msg.content"></vue-markdown>
253+
<!-- render timings if enabled -->
254+
<div class="dropdown dropdown-hover dropdown-top mt-2" v-if="timings && config.showTokensPerSecond">
255+
<div tabindex="0" role="button" class="cursor-pointer font-semibold text-sm opacity-70">Speed: {{ timings.predicted_per_second.toFixed(1) }} t/s</div>
256+
<div class="dropdown-content bg-base-100 z-10 w-64 p-2 shadow mt-4">
257+
<b>Prompt</b><br/>
258+
- Tokens: {{ timings.prompt_n }}<br/>
259+
- Time: {{ timings.prompt_ms }} ms<br/>
260+
- Speed: {{ timings.prompt_per_second.toFixed(1) }} t/s<br/>
261+
<b>Generation</b><br/>
262+
- Tokens: {{ timings.predicted_n }}<br/>
263+
- Time: {{ timings.predicted_ms }} ms<br/>
264+
- Speed: {{ timings.predicted_per_second.toFixed(1) }} t/s<br/>
265+
</div>
266+
</div>
267+
</template>
248268
</div>
249269
</div>
250270
<!-- actions for each message -->
251-
<div :class="{'text-right': msg.role === 'user'}" class="mx-4 mt-2 mb-2">
271+
<div :class="{'text-right': msg.role === 'user', 'opacity-0': isGenerating}" class="mx-4 mt-2 mb-2">
252272
<!-- user message -->
253273
<button v-if="msg.role === 'user'" class="badge btn-mini show-on-hover" @click="editingContent = msg.content" :disabled="isGenerating">
254274
✍️ Edit

examples/server/webui/src/main.js

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ const CONFIG_DEFAULT = {
1919
// Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
2020
apiKey: '',
2121
systemMessage: 'You are a helpful assistant.',
22+
showTokensPerSecond: false,
2223
// make sure these default values are in sync with `common.h`
2324
samplers: 'dkypmxt',
2425
temperature: 0.8,
@@ -39,7 +40,6 @@ const CONFIG_DEFAULT = {
3940
dry_allowed_length: 2,
4041
dry_penalty_last_n: -1,
4142
max_tokens: -1,
42-
show_tokens_per_second: false,
4343
custom: '', // custom json-stringified object
4444
};
4545
const CONFIG_INFO = {
@@ -112,6 +112,7 @@ const MessageBubble = defineComponent({
112112
},
113113
template: document.getElementById('message-bubble').innerHTML,
114114
props: {
115+
config: Object,
115116
msg: Object,
116117
isGenerating: Boolean,
117118
editUserMsgAndRegenerate: Function,
@@ -122,6 +123,16 @@ const MessageBubble = defineComponent({
122123
editingContent: null,
123124
};
124125
},
126+
computed: {
127+
timings() {
128+
if (!this.msg.timings) return null;
129+
return {
130+
...this.msg.timings,
131+
prompt_per_second: this.msg.timings.prompt_n / (this.msg.timings.prompt_ms / 1000),
132+
predicted_per_second: this.msg.timings.predicted_n / (this.msg.timings.predicted_ms / 1000),
133+
};
134+
}
135+
},
125136
methods: {
126137
copyMsg() {
127138
copyStr(this.msg.content);
@@ -381,7 +392,7 @@ const mainApp = createApp({
381392
dry_allowed_length: this.config.dry_allowed_length,
382393
dry_penalty_last_n: this.config.dry_penalty_last_n,
383394
max_tokens: this.config.max_tokens,
384-
timings_per_token: !!this.config.show_tokens_per_second,
395+
timings_per_token: !!this.config.showTokensPerSecond,
385396
...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
386397
};
387398
const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, {
@@ -404,6 +415,16 @@ const mainApp = createApp({
404415
content: lastContent + addedContent,
405416
};
406417
}
418+
const timings = chunk.timings;
419+
if (timings) {
420+
// only extract what's really needed, to save some space
421+
this.pendingMsg.timings = {
422+
prompt_n: timings.prompt_n,
423+
prompt_ms: timings.prompt_ms,
424+
predicted_n: timings.predicted_n,
425+
predicted_ms: timings.predicted_ms,
426+
};
427+
}
407428
}
408429

409430
StorageUtils.appendMsg(currConvId, this.pendingMsg);

0 commit comments

Comments
 (0)