Skip to content

Commit 8699fd9

Browse files
committed
feat: Improves context handling and error reporting (WIP)
Refactors context management by utilizing real-time slot data for accurate context limit checks, replacing token estimation with server-side slot information. Introduces a dedicated context service and enhances error reporting to provide more informative messages when context limits are exceeded, including the removal of both user and assistant messages upon context errors. Additionally, it tweaks the chat form textarea and message UI for better visual consistency.
1 parent 6c99820 commit 8699fd9

File tree

10 files changed

+285
-218
lines changed

10 files changed

+285
-218
lines changed

tools/server/webui/src/lib/components/app/MaximumContextAlertDialog.svelte

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,6 @@
3131
</span>
3232
</div>
3333

34-
<div>
35-
Maximum allowed:
36-
<span class="font-mono">
37-
{maxContextError()?.maxAllowed.toLocaleString()}
38-
</span>
39-
</div>
40-
4134
<div>
4235
Context window:
4336
<span class="font-mono">

tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
4040
export function resetHeight() {
4141
if (textareaElement) {
42-
textareaElement.style.height = 'auto';
42+
textareaElement.style.height = '1rem';
4343
}
4444
}
4545
</script>
@@ -48,7 +48,7 @@
4848
<textarea
4949
bind:this={textareaElement}
5050
bind:value
51-
class="placeholder:text-muted-foreground text-md max-h-32 min-h-[24px] w-full resize-none border-0 bg-transparent p-0 leading-6 outline-none focus-visible:ring-0 focus-visible:ring-offset-0"
51+
class="placeholder:text-muted-foreground text-md max-h-32 min-h-12 w-full resize-none border-0 bg-transparent p-0 leading-6 outline-none focus-visible:ring-0 focus-visible:ring-offset-0"
5252
onkeydown={onKeydown}
5353
oninput={(event) => autoResizeTextarea(event.currentTarget)}
5454
onpaste={onPaste}

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte

Lines changed: 42 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -195,18 +195,20 @@
195195

196196
{#snippet messageActions(config?: { role: ChatRole })}
197197
{#if config?.role === 'assistant' && !message.content && isLoading()}
198-
<div class="mx-auto w-full max-w-[48rem] mb-16" in:fade>
199-
<span class="processing-text">
200-
{processingState.getProcessingMessage()}
201-
</span>
202-
203-
{#if processingState.shouldShowDetails()}
204-
<div class="processing-details">
205-
{#each processingState.getProcessingDetails() as detail}
206-
<span class="processing-detail">{detail}</span>
207-
{/each}
208-
</div>
209-
{/if}
198+
<div class="w-full max-w-[48rem] mb-24" in:fade>
199+
<div class="processing-container">
200+
<span class="processing-text">
201+
{processingState.getProcessingMessage()}
202+
</span>
203+
204+
{#if processingState.shouldShowDetails()}
205+
<div class="processing-details">
206+
{#each processingState.getProcessingDetails() as detail}
207+
<span class="processing-detail">{detail}</span>
208+
{/each}
209+
</div>
210+
{/if}
211+
</div>
210212
</div>
211213
{/if}
212214

@@ -256,21 +258,30 @@
256258
{/if}
257259
</div>
258260

259-
<div
260-
class="{config?.role === 'user'
261-
? 'right-0'
262-
: 'left-0'} text-muted-foreground absolute text-xs transition-all duration-150 group-hover:pointer-events-none group-hover:opacity-0"
263-
>
264-
{message.timestamp
265-
? new Date(message.timestamp).toLocaleTimeString(undefined, {
266-
hour: '2-digit',
267-
minute: '2-digit'
268-
})
269-
: ''}
270-
</div>
261+
{#if messageContent.trim().length > 0}
262+
<div
263+
class="{config?.role === 'user'
264+
? 'right-0'
265+
: 'left-0'} text-muted-foreground absolute text-xs transition-all duration-150 group-hover:pointer-events-none group-hover:opacity-0"
266+
>
267+
{message.timestamp
268+
? new Date(message.timestamp).toLocaleTimeString(undefined, {
269+
hour: '2-digit',
270+
minute: '2-digit'
271+
})
272+
: ''}
273+
</div>
274+
{/if}
271275
{/snippet}
272276

273277
<style>
278+
.processing-container {
279+
display: flex;
280+
flex-direction: column;
281+
align-items: flex-start;
282+
gap: 0.5rem;
283+
}
284+
274285
.processing-text {
275286
background: linear-gradient(90deg, var(--muted-foreground), var(--foreground), var(--muted-foreground));
276287
background-size: 200% 100%;
@@ -285,18 +296,20 @@
285296
.processing-details {
286297
display: flex;
287298
flex-wrap: wrap;
288-
justify-content: center;
289-
gap: 0.75rem;
290-
margin-top: 0.25rem;
299+
align-items: center;
300+
gap: 0.5rem;
301+
margin-top: 0;
291302
}
292303
293304
.processing-detail {
294305
color: var(--muted-foreground);
295306
font-size: 0.75rem;
296-
padding: 0.125rem 0.5rem;
307+
padding: 0.25rem 0.5rem;
297308
background: var(--muted);
298-
border-radius: 0.375rem;
309+
border-radius: 0.5rem;
299310
font-family: ui-monospace, SFMono-Regular, "SF Mono", Consolas, "Liberation Mono", Menlo, monospace;
311+
white-space: nowrap;
312+
line-height: 1.2;
300313
}
301314
302315
@keyframes shine {

tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import { serverStore } from '$lib/stores/server.svelte';
55
import { isFileTypeSupported } from '$lib/constants/supported-file-types';
66
import { filterFilesByModalities, generateModalityErrorMessage } from '$lib/utils/modality-file-validation';
7+
import { supportsVision, supportsAudio } from '$lib/stores/server.svelte';
78
import { ChatForm, ChatScreenHeader, ChatMessages, ServerInfo } from '$lib/components/app';
89
import {
910
activeMessages,
@@ -13,7 +14,7 @@
1314
stopGeneration,
1415
setMaxContextError
1516
} from '$lib/stores/chat.svelte';
16-
import { wouldExceedContextLength } from '$lib/utils/token-estimation';
17+
import { contextService } from '$lib/services/context';
1718
import { fade, fly, slide } from 'svelte/transition';
1819
import { AUTO_SCROLL_THRESHOLD } from '$lib/constants/auto-scroll';
1920
import { navigating } from '$app/state';
@@ -102,28 +103,21 @@
102103
files?: ChatUploadedFile[]
103104
): Promise<boolean> {
104105
const extras = files ? await parseFilesToMessageExtras(files) : undefined;
105-
const maxContextLength = serverStore.serverProps?.default_generation_settings.n_ctx;
106-
107-
if (maxContextLength) {
108-
const contextCheck = wouldExceedContextLength(
109-
activeMessages(),
110-
message,
111-
extras,
112-
maxContextLength
113-
);
114-
115-
if (contextCheck.wouldExceed) {
116-
const errorMessage = `Message too long for context window. Estimated tokens: ${contextCheck.estimatedTokens.toLocaleString()}, Maximum allowed: ${contextCheck.maxAllowed.toLocaleString()} (Context: ${maxContextLength.toLocaleString()})`;
117-
118-
setMaxContextError({
119-
message: errorMessage,
120-
estimatedTokens: contextCheck.estimatedTokens,
121-
maxAllowed: contextCheck.maxAllowed,
122-
maxContext: maxContextLength
123-
});
124-
125-
return false;
126-
}
106+
107+
// Check context limit using real-time slots data
108+
const contextCheck = await contextService.checkContextLimit();
109+
110+
if (contextCheck && contextCheck.wouldExceed) {
111+
const errorMessage = contextService.getContextErrorMessage(contextCheck);
112+
113+
setMaxContextError({
114+
message: errorMessage,
115+
estimatedTokens: contextCheck.currentUsage,
116+
maxAllowed: contextCheck.availableTokens,
117+
maxContext: contextCheck.maxContext
118+
});
119+
120+
return false;
127121
}
128122
129123
await sendMessage(message, extras);

tools/server/webui/src/lib/services/chat.ts

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ export class ChatService {
147147
let thinkContent = '';
148148
let regularContent = '';
149149
let insideThinkTag = false;
150+
let hasReceivedData = false;
150151

151152
try {
152153
while (true) {
@@ -160,6 +161,14 @@ export class ChatService {
160161
if (line.startsWith('data: ')) {
161162
const data = line.slice(6);
162163
if (data === '[DONE]') {
164+
// Check if we received any actual content
165+
if (!hasReceivedData && fullResponse.length === 0) {
166+
// Empty response - likely a context error
167+
const contextError = new Error('The request exceeds the available context size. Try increasing the context size or enable context shift.');
168+
contextError.name = 'ContextError';
169+
onError?.(contextError);
170+
return;
171+
}
163172
onComplete?.(fullResponse);
164173
return;
165174
}
@@ -168,6 +177,7 @@ export class ChatService {
168177
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
169178
const content = parsed.choices[0]?.delta?.content;
170179
if (content) {
180+
hasReceivedData = true;
171181
fullResponse += content;
172182

173183
// Process content character by character to handle think tags
@@ -190,6 +200,14 @@ export class ChatService {
190200
}
191201
}
192202
}
203+
204+
// If we reach here without receiving [DONE] and no data, it's likely a context error
205+
if (!hasReceivedData && fullResponse.length === 0) {
206+
const contextError = new Error('The request exceeds the available context size. Try increasing the context size or enable context shift.');
207+
contextError.name = 'ContextError';
208+
onError?.(contextError);
209+
return;
210+
}
193211
} catch (error) {
194212
const err = error instanceof Error ? error : new Error('Stream error');
195213

@@ -217,13 +235,36 @@ export class ChatService {
217235
onError?: (error: Error) => void
218236
): Promise<string> {
219237
try {
220-
const data: ApiChatCompletionResponse = await response.json();
238+
// Check if response body is empty
239+
const responseText = await response.text();
240+
if (!responseText.trim()) {
241+
// Empty response - likely a context error
242+
const contextError = new Error('The request exceeds the available context size. Try increasing the context size or enable context shift.');
243+
contextError.name = 'ContextError';
244+
onError?.(contextError);
245+
throw contextError;
246+
}
247+
248+
const data: ApiChatCompletionResponse = JSON.parse(responseText);
221249
const content = data.choices[0]?.message?.content || '';
222250

251+
// Check if content is empty even with valid JSON structure
252+
if (!content.trim()) {
253+
const contextError = new Error('The request exceeds the available context size. Try increasing the context size or enable context shift.');
254+
contextError.name = 'ContextError';
255+
onError?.(contextError);
256+
throw contextError;
257+
}
258+
223259
onComplete?.(content);
224260

225261
return content;
226262
} catch (error) {
263+
// If it's already a ContextError, re-throw it
264+
if (error instanceof Error && error.name === 'ContextError') {
265+
throw error;
266+
}
267+
227268
const err = error instanceof Error ? error : new Error('Parse error');
228269

229270
onError?.(err);
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { slotsService } from './slots';
2+
import { serverStore } from '$lib/stores/server.svelte';
3+
import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database';
4+
5+
export interface ContextCheckResult {
6+
wouldExceed: boolean;
7+
currentUsage: number;
8+
maxContext: number;
9+
availableTokens: number;
10+
reservedTokens: number;
11+
}
12+
13+
/**
14+
* Enhanced context service that uses real-time slots data for accurate context checking
15+
*/
16+
export class ContextService {
17+
private reserveTokens: number;
18+
19+
constructor(reserveTokens = 512) {
20+
this.reserveTokens = reserveTokens;
21+
}
22+
23+
/**
24+
* Check if sending a new message would exceed context limits using real-time slots data
25+
*/
26+
async checkContextLimit(): Promise<ContextCheckResult | null> {
27+
try {
28+
const currentState = await slotsService.getCurrentState();
29+
30+
if (!currentState) {
31+
return null;
32+
}
33+
34+
const maxContext = currentState.contextTotal;
35+
const currentUsage = currentState.contextUsed;
36+
const availableTokens = maxContext - currentUsage - this.reserveTokens;
37+
const wouldExceed = availableTokens <= 0;
38+
39+
return {
40+
wouldExceed,
41+
currentUsage,
42+
maxContext,
43+
availableTokens: Math.max(0, availableTokens),
44+
reservedTokens: this.reserveTokens
45+
};
46+
} catch (error) {
47+
console.warn('Error checking context limit:', error);
48+
return null;
49+
}
50+
}
51+
52+
/**
53+
* Get a formatted error message for context limit exceeded
54+
*/
55+
getContextErrorMessage(result: ContextCheckResult): string {
56+
const usagePercent = Math.round((result.currentUsage / result.maxContext) * 100);
57+
return `Context window is nearly full. Current usage: ${result.currentUsage.toLocaleString()}/${result.maxContext.toLocaleString()} tokens (${usagePercent}%). Available space: ${result.availableTokens.toLocaleString()} tokens (${result.reservedTokens} reserved for response).`;
58+
}
59+
60+
/**
61+
* Set the number of tokens to reserve for response generation
62+
*/
63+
setReserveTokens(tokens: number): void {
64+
this.reserveTokens = tokens;
65+
}
66+
}
67+
68+
// Global instance
69+
export const contextService = new ContextService();

0 commit comments

Comments
 (0)