Skip to content

Commit 362f743

Browse files
committed
feat: Displays slot usage details in the UI above the Chat Form
1 parent 8cf3cff commit 362f743

File tree

9 files changed

+137
-14
lines changed

9 files changed

+137
-14
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
<script lang="ts">
2+
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
3+
import { isLoading } from '$lib/stores/chat.svelte';
4+
import { onMount } from 'svelte';
5+
6+
const processingState = useProcessingState();
7+
8+
let showSlotsInfo = $derived(isLoading());
9+
10+
let processingDetails = $derived(processingState.getProcessingDetails());
11+
12+
onMount(() => {
13+
processingState.startMonitoring();
14+
15+
return () => {
16+
processingState.stopMonitoring();
17+
};
18+
});
19+
20+
$effect(() => {
21+
if (isLoading()) {
22+
processingState.startMonitoring();
23+
} else {
24+
processingState.stopMonitoring();
25+
}
26+
});
27+
</script>
28+
29+
<div
30+
class="slots-info-container"
31+
class:visible={showSlotsInfo}
32+
>
33+
<div class="slots-info-content">
34+
{#each processingDetails as detail (detail)}
35+
<span class="slots-info-detail">{detail}</span>
36+
{/each}
37+
</div>
38+
</div>
39+
40+
<style>
41+
.slots-info-container {
42+
position: sticky;
43+
top: 0;
44+
z-index: 10;
45+
background: var(--background);
46+
backdrop-filter: blur(8px);
47+
background: rgba(var(--background-rgb), 0.95);
48+
padding: 0.75rem 1rem;
49+
margin-bottom: 1rem;
50+
opacity: 0;
51+
transform: translateY(-100%);
52+
transition: opacity 300ms ease-out, transform 300ms ease-out;
53+
}
54+
55+
.slots-info-container.visible {
56+
opacity: 1;
57+
transform: translateY(0);
58+
}
59+
60+
.slots-info-content {
61+
display: flex;
62+
flex-wrap: wrap;
63+
align-items: center;
64+
gap: 1rem;
65+
justify-content: center;
66+
max-width: 48rem;
67+
margin: 0 auto;
68+
}
69+
70+
.slots-info-detail {
71+
color: var(--muted-foreground);
72+
font-size: 0.75rem;
73+
padding: 0.25rem 0.75rem;
74+
background: var(--muted);
75+
border-radius: 0.375rem;
76+
font-family: ui-monospace, SFMono-Regular, "SF Mono", Consolas, "Liberation Mono", Menlo, monospace;
77+
white-space: nowrap;
78+
}
79+
80+
@media (max-width: 768px) {
81+
.slots-info-content {
82+
gap: 0.5rem;
83+
}
84+
85+
.slots-info-detail {
86+
font-size: 0.7rem;
87+
padding: 0.2rem 0.5rem;
88+
}
89+
}
90+
</style>

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -211,20 +211,12 @@
211211
/>
212212
{/if}
213213

214-
{#if message?.role === 'assistant' && !message.content && isLoading()}
214+
{#if message?.role === 'assistant' && isLoading() && !message?.content?.trim()}
215215
<div class="w-full max-w-[48rem] mt-6" in:fade>
216216
<div class="processing-container">
217217
<span class="processing-text">
218218
{processingState.getProcessingMessage()}
219219
</span>
220-
221-
{#if processingState.shouldShowDetails()}
222-
<div class="processing-details">
223-
{#each processingState.getProcessingDetails() as detail}
224-
<span class="processing-detail">{detail}</span>
225-
{/each}
226-
</div>
227-
{/if}
228220
</div>
229221
</div>
230222
{/if}

tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import { isFileTypeSupported } from '$lib/constants/supported-file-types';
66
import { filterFilesByModalities } from '$lib/utils/modality-file-validation';
77
import { supportsVision, supportsAudio, serverError, serverLoading } from '$lib/stores/server.svelte';
8-
import { ChatForm, ChatScreenHeader, ChatMessages, ServerInfo, ServerErrorSplash, ServerLoadingSplash } from '$lib/components/app';
8+
import { ChatForm, ChatScreenHeader, ChatMessages, ServerInfo, ServerErrorSplash, ServerLoadingSplash, SlotsInfo } from '$lib/components/app';
99
import {
1010
activeMessages,
1111
activeConversation,
@@ -23,7 +23,6 @@
2323
import { deleteConversation } from '$lib/stores/chat.svelte';
2424
import { goto } from '$app/navigation';
2525
26-
2726
let { showCenteredEmpty = false } = $props();
2827
let chatScrollContainer: HTMLDivElement | undefined = $state();
2928
let scrollInterval: ReturnType<typeof setInterval> | undefined;
@@ -251,6 +250,8 @@
251250
<ChatMessages class="mb-16 md:mb-24" messages={activeMessages()} />
252251

253252
<div class="sticky bottom-0 left-0 right-0 mt-auto" in:slide={{ duration: 150, axis: 'y' }}>
253+
<SlotsInfo />
254+
254255
<div class="conversation-chat-form rounded-t-3xl pb-4">
255256
<ChatForm
256257
isLoading={isLoading()}

tools/server/webui/src/lib/components/app/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ export { default as ServerStatus } from './ServerStatus.svelte';
3434
export { default as ServerErrorSplash } from './ServerErrorSplash.svelte';
3535
export { default as ServerLoadingSplash } from './ServerLoadingSplash.svelte';
3636
export { default as ServerInfo } from './ServerInfo.svelte';
37+
export { default as SlotsInfo } from './SlotsInfo.svelte';

tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { slotsService } from '$lib/services/slots';
22
import type { ApiProcessingState } from '$lib/types/api';
3+
import { config } from '$lib/stores/settings.svelte';
34

45
export function useProcessingState() {
56
let processingState = $state<ApiProcessingState | null>(null);
@@ -63,11 +64,16 @@ export function useProcessingState() {
6364
}
6465

6566
const details: string[] = [];
67+
const currentConfig = config(); // Get fresh config each time
6668

6769
if (processingState.contextUsed > 0) {
6870
const contextPercent = Math.round((processingState.contextUsed / processingState.contextTotal) * 100);
6971
details.push(`Context: ${processingState.contextUsed}/${processingState.contextTotal} (${contextPercent}%)`);
7072
}
73+
74+
if (currentConfig.showTokensPerSecond && processingState.tokensPerSecond && processingState.tokensPerSecond > 0) {
75+
details.push(`${processingState.tokensPerSecond.toFixed(1)} tokens/sec`);
76+
}
7177

7278
if (processingState.temperature !== 0.8) {
7379
details.push(`Temperature: ${processingState.temperature.toFixed(1)}`);

tools/server/webui/src/lib/services/chat.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export class ChatService {
3131
repeat_last_n, repeat_penalty, presence_penalty, frequency_penalty,
3232
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
3333
// Other parameters
34-
samplers, custom
34+
samplers, custom, timings_per_token
3535
} = options;
3636

3737
// Cancel any ongoing request and create a new abort controller
@@ -78,6 +78,9 @@ export class ChatService {
7878
requestBody.samplers = typeof samplers === 'string' ? samplers.split(';').filter((s: string) => s.trim()) : samplers;
7979
}
8080

81+
// Add timing parameters if provided
82+
if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
83+
8184
// Add custom parameters if provided
8285
if (custom) {
8386
try {

tools/server/webui/src/lib/services/slots.ts

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ export class SlotsService {
77
private callbacks: Set<(state: ApiProcessingState) => void> = new Set();
88
private slotsAvailable: boolean | null = null;
99
private slotsEndpointSupported: boolean | null = null;
10+
private lastTokenCount: number = 0;
11+
private lastTimestamp: number = 0;
1012

1113
constructor(pollingInterval = 500) {
1214
this.pollingInterval = pollingInterval;
@@ -141,7 +143,8 @@ export class SlotsService {
141143
temperature: 0.8,
142144
topP: 0.95,
143145
speculative: false,
144-
hasNextToken: false
146+
hasNextToken: false,
147+
tokensPerSecond: 0
145148
};
146149
}
147150

@@ -159,6 +162,29 @@ export class SlotsService {
159162
const promptTokens = Math.floor(activeSlot.prompt.length / 4); // Rough estimate
160163
const contextUsed = promptTokens + activeSlot.next_token.n_decoded;
161164

165+
// Calculate tokens per second
166+
let tokensPerSecond = 0;
167+
const currentTime = Date.now();
168+
const currentTokens = activeSlot.next_token.n_decoded;
169+
170+
if (status === 'generating' && this.lastTimestamp > 0 && currentTokens > this.lastTokenCount) {
171+
const timeDiff = (currentTime - this.lastTimestamp) / 1000; // Convert to seconds
172+
const tokenDiff = currentTokens - this.lastTokenCount;
173+
if (timeDiff > 0) {
174+
tokensPerSecond = tokenDiff / timeDiff;
175+
}
176+
}
177+
178+
// Update tracking for next calculation
179+
if (status === 'generating') {
180+
this.lastTokenCount = currentTokens;
181+
this.lastTimestamp = currentTime;
182+
} else if (status === 'idle') {
183+
// Reset when idle
184+
this.lastTokenCount = 0;
185+
this.lastTimestamp = 0;
186+
}
187+
162188
return {
163189
status,
164190
tokensDecoded: activeSlot.next_token.n_decoded,
@@ -168,7 +194,8 @@ export class SlotsService {
168194
temperature: activeSlot.params.temperature,
169195
topP: activeSlot.params.top_p,
170196
speculative: activeSlot.speculative,
171-
hasNextToken: activeSlot.next_token.has_next_token
197+
hasNextToken: activeSlot.next_token.has_next_token,
198+
tokensPerSecond
172199
};
173200
}
174201

tools/server/webui/src/lib/stores/chat.svelte.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ class ChatStore {
160160
// Generation parameters
161161
temperature: Number(currentConfig.temperature) || 0.8,
162162
max_tokens: Number(currentConfig.max_tokens) || 2048,
163+
// Timing parameters
164+
timings_per_token: currentConfig.showTokensPerSecond || false,
163165
// Sampling parameters
164166
dynatemp_range: Number(currentConfig.dynatemp_range) || 0.0,
165167
dynatemp_exponent: Number(currentConfig.dynatemp_exponent) || 1.0,

tools/server/webui/src/lib/types/api.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,4 +229,5 @@ export interface ApiProcessingState {
229229
topP: number;
230230
speculative: boolean;
231231
hasNextToken: boolean;
232+
tokensPerSecond?: number;
232233
}

0 commit comments

Comments
 (0)