Skip to content

Commit 54cfe15

Browse files
committed
feat: Adds context length check before sending message (PoC)
Implements a check to ensure the message content and history do not exceed the model's context window limits. This prevents errors and improves the user experience by displaying an informative alert dialog when the context length would be exceeded, suggesting ways to shorten the message or start a new conversation. Also adds a method to clear the context error.
1 parent f3a8758 commit 54cfe15

File tree

3 files changed

+233
-1
lines changed

3 files changed

+233
-1
lines changed

tools/server/webui/src/lib/stores/chat.svelte.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import type { DatabaseConversation, DatabaseMessage, DatabaseMessageExtra } from
44
import { goto } from '$app/navigation';
55
import { browser } from '$app/environment';
66
import { extractPartialThinking } from '$lib/utils/thinking';
7+
import { wouldExceedContextLength } from '$lib/utils/token-estimation';
8+
import { serverStore } from '$lib/stores/server.svelte';
79
import type { ChatMessageType, ChatRole } from '$lib/app';
810

911
class ChatStore {
@@ -13,6 +15,7 @@ class ChatStore {
1315
currentResponse = $state('');
1416
isInitialized = $state(false);
1517
isLoading = $state(false);
18+
contextError = $state<{ message: string; estimatedTokens: number; maxAllowed: number; maxContext: number } | null>(null);
1619
private chatService = new ChatService();
1720

1821
constructor() {
@@ -209,6 +212,32 @@ class ChatStore {
209212
async sendMessage(content: string, extras?: DatabaseMessageExtra[]): Promise<void> {
210213
if (!content.trim() || this.isLoading) return;
211214

215+
// Check context length BEFORE creating conversation or processing anything
216+
const maxContextLength = serverStore.serverProps?.default_generation_settings.n_ctx;
217+
if (maxContextLength) {
218+
const contextCheck = wouldExceedContextLength(
219+
this.activeMessages,
220+
content,
221+
extras,
222+
maxContextLength
223+
);
224+
225+
if (contextCheck.wouldExceed) {
226+
const errorMessage = `Message too long for context window. Estimated tokens: ${contextCheck.estimatedTokens.toLocaleString()}, Maximum allowed: ${contextCheck.maxAllowed.toLocaleString()} (Context: ${maxContextLength.toLocaleString()})`;
227+
console.error('Context length exceeded:', errorMessage);
228+
229+
// Set context error state for UI to display alert dialog
230+
this.contextError = {
231+
message: errorMessage,
232+
estimatedTokens: contextCheck.estimatedTokens,
233+
maxAllowed: contextCheck.maxAllowed,
234+
maxContext: maxContextLength
235+
};
236+
// Early return - prevent any conversation creation or message processing
237+
return;
238+
}
239+
}
240+
212241
let isNewConversation = false;
213242

214243
if (!this.activeConversation) {
@@ -278,6 +307,13 @@ class ChatStore {
278307
this.currentResponse = '';
279308
}
280309

310+
/**
311+
* Clear context error state
312+
*/
313+
clearContextError(): void {
314+
this.contextError = null;
315+
}
316+
281317
private async savePartialResponseIfNeeded() {
282318
if (!this.currentResponse.trim() || !this.activeMessages.length) {
283319
return;
@@ -507,6 +543,7 @@ export const activeMessages = () => chatStore.activeMessages;
507543
export const isLoading = () => chatStore.isLoading;
508544
export const currentResponse = () => chatStore.currentResponse;
509545
export const isInitialized = () => chatStore.isInitialized;
546+
export const contextError = () => chatStore.contextError;
510547

511548
export const createConversation = chatStore.createConversation.bind(chatStore);
512549
export const loadConversation = chatStore.loadConversation.bind(chatStore);
@@ -517,6 +554,7 @@ export const updateConversationName = chatStore.updateConversationName.bind(chat
517554
export const deleteConversation = chatStore.deleteConversation.bind(chatStore);
518555
export const clearActiveConversation = chatStore.clearActiveConversation.bind(chatStore);
519556
export const gracefulStop = chatStore.gracefulStop.bind(chatStore);
557+
export const clearContextError = chatStore.clearContextError.bind(chatStore);
520558

521559
export function stopGeneration() {
522560
chatStore.stopGeneration();
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/**
2+
* Token estimation utilities for context length validation
3+
*/
4+
5+
import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database';
6+
import type { ApiChatMessageData } from '$lib/types/api';
7+
8+
/**
9+
* Rough token estimation based on character count
10+
* Uses a conservative estimate of ~4 characters per token for most languages
11+
* This is a heuristic and may not be perfectly accurate, but provides a reasonable guardrail
12+
*/
13+
export function estimateTokenCount(text: string): number {
14+
if (!text) return 0;
15+
16+
// Remove extra whitespace and normalize
17+
const normalizedText = text.trim().replace(/\s+/g, ' ');
18+
19+
// Conservative estimate: ~4 characters per token
20+
// This accounts for various languages and encoding differences
21+
return Math.ceil(normalizedText.length / 4);
22+
}
23+
24+
/**
25+
* Estimate token count for a single message including its extras (attachments)
26+
*/
27+
export function estimateMessageTokens(message: DatabaseMessage): number {
28+
let totalTokens = 0;
29+
30+
// Count tokens in main content
31+
totalTokens += estimateTokenCount(message.content);
32+
33+
// Count tokens in message extras (attachments)
34+
if (message.extra) {
35+
for (const extra of message.extra) {
36+
switch (extra.type) {
37+
case 'textFile':
38+
// Text files contribute their full content to token count
39+
totalTokens += estimateTokenCount(extra.content);
40+
// Add small overhead for file name formatting
41+
totalTokens += estimateTokenCount(`--- File: ${extra.name} ---`);
42+
break;
43+
case 'pdfFile':
44+
// PDF content contributes to token count
45+
totalTokens += estimateTokenCount(extra.content);
46+
totalTokens += estimateTokenCount(`--- PDF File: ${extra.name} ---`);
47+
break;
48+
case 'imageFile':
49+
// Images have a fixed token cost (varies by model, but ~85-170 tokens is common)
50+
// Using conservative estimate of 200 tokens per image
51+
totalTokens += 200;
52+
break;
53+
default:
54+
// Unknown attachment types get a small token overhead
55+
totalTokens += 10;
56+
break;
57+
}
58+
}
59+
}
60+
61+
return totalTokens;
62+
}
63+
64+
/**
65+
* Estimate total token count for a conversation including all messages
66+
*/
67+
export function estimateConversationTokens(messages: DatabaseMessage[]): number {
68+
let totalTokens = 0;
69+
70+
for (const message of messages) {
71+
totalTokens += estimateMessageTokens(message);
72+
73+
// Add small overhead for role formatting and message structure
74+
totalTokens += 10;
75+
}
76+
77+
// Add overhead for chat template and system formatting
78+
totalTokens += 50;
79+
80+
return totalTokens;
81+
}
82+
83+
/**
84+
* Estimate tokens for a new message with extras before sending
85+
*/
86+
export function estimateNewMessageTokens(content: string, extras?: DatabaseMessageExtra[]): number {
87+
let totalTokens = estimateTokenCount(content);
88+
89+
if (extras) {
90+
for (const extra of extras) {
91+
switch (extra.type) {
92+
case 'textFile':
93+
totalTokens += estimateTokenCount(extra.content);
94+
totalTokens += estimateTokenCount(`--- File: ${extra.name} ---`);
95+
break;
96+
case 'pdfFile':
97+
totalTokens += estimateTokenCount(extra.content);
98+
totalTokens += estimateTokenCount(`--- PDF File: ${extra.name} ---`);
99+
break;
100+
case 'imageFile':
101+
totalTokens += 200; // Conservative estimate for image tokens
102+
break;
103+
default:
104+
totalTokens += 10;
105+
break;
106+
}
107+
}
108+
}
109+
110+
// Add overhead for message formatting
111+
totalTokens += 10;
112+
113+
return totalTokens;
114+
}
115+
116+
/**
117+
* Check if adding a new message would exceed the context length
118+
*/
119+
export function wouldExceedContextLength(
120+
existingMessages: DatabaseMessage[],
121+
newMessageContent: string,
122+
newMessageExtras: DatabaseMessageExtra[] | undefined,
123+
maxContextLength: number,
124+
reserveTokens: number = 512 // Reserve tokens for response generation
125+
): { wouldExceed: boolean; estimatedTokens: number; maxAllowed: number } {
126+
const existingTokens = estimateConversationTokens(existingMessages);
127+
const newMessageTokens = estimateNewMessageTokens(newMessageContent, newMessageExtras);
128+
const totalEstimatedTokens = existingTokens + newMessageTokens;
129+
const maxAllowedTokens = maxContextLength - reserveTokens;
130+
131+
return {
132+
wouldExceed: totalEstimatedTokens > maxAllowedTokens,
133+
estimatedTokens: totalEstimatedTokens,
134+
maxAllowed: maxAllowedTokens
135+
};
136+
}

tools/server/webui/src/routes/+layout.svelte

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
import { ModeWatcher } from 'mode-watcher';
44
import { Toaster } from 'svelte-sonner';
55
import { ChatSidebar } from '$lib/components/app';
6-
import { activeMessages, isLoading } from '$lib/stores/chat.svelte';
6+
import { activeMessages, isLoading, contextError, clearContextError } from '$lib/stores/chat.svelte';
77
import { serverStore } from '$lib/stores/server.svelte';
88
import { page } from '$app/state';
99
import * as Sidebar from '$lib/components/ui/sidebar/index.js';
10+
import * as AlertDialog from '$lib/components/ui/alert-dialog';
11+
import { AlertTriangle } from '@lucide/svelte';
1012
1113
let { children } = $props();
1214
@@ -42,6 +44,62 @@
4244

4345
<Toaster richColors />
4446

47+
<!-- Context Length Error Alert Dialog -->
48+
<AlertDialog.Root
49+
open={contextError() !== null}
50+
onOpenChange={(open) => !open && clearContextError()}
51+
>
52+
<AlertDialog.Content>
53+
<AlertDialog.Header>
54+
<AlertDialog.Title class="flex items-center gap-2">
55+
<AlertTriangle class="text-destructive h-5 w-5" />
56+
Message Too Long
57+
</AlertDialog.Title>
58+
<AlertDialog.Description>
59+
Your message exceeds the model's context window and cannot be processed.
60+
</AlertDialog.Description>
61+
</AlertDialog.Header>
62+
63+
{#if contextError()}
64+
<div class="space-y-3 text-sm">
65+
<div class="bg-muted rounded-lg p-3">
66+
<div class="mb-2 font-medium">Token Usage:</div>
67+
<div class="text-muted-foreground space-y-1">
68+
<div>
69+
Estimated tokens: <span class="font-mono"
70+
>{contextError()?.estimatedTokens.toLocaleString()}</span
71+
>
72+
</div>
73+
<div>
74+
Maximum allowed: <span class="font-mono"
75+
>{contextError()?.maxAllowed.toLocaleString()}</span
76+
>
77+
</div>
78+
<div>
79+
Context window: <span class="font-mono"
80+
>{contextError()?.maxContext.toLocaleString()}</span
81+
>
82+
</div>
83+
</div>
84+
</div>
85+
86+
<div>
87+
<div class="mb-2 font-medium">Suggestions:</div>
88+
<ul class="text-muted-foreground list-inside list-disc space-y-1">
89+
<li>Shorten your message</li>
90+
<li>Remove some file attachments</li>
91+
<li>Start a new conversation</li>
92+
</ul>
93+
</div>
94+
</div>
95+
{/if}
96+
97+
<AlertDialog.Footer>
98+
<AlertDialog.Action onclick={() => clearContextError()}>Got it</AlertDialog.Action>
99+
</AlertDialog.Footer>
100+
</AlertDialog.Content>
101+
</AlertDialog.Root>
102+
45103
<Sidebar.Provider bind:open={sidebarOpen}>
46104
<div class="flex h-screen w-full">
47105
<Sidebar.Root class="h-full">

0 commit comments

Comments
 (0)