|
| 1 | +export const DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT = 20000; |
| 2 | + |
1 | 3 | export function getByteSize(str: string): number {
|
2 |
| - return new TextEncoder().encode(str).length; |
| 4 | + let bytes = 0; |
| 5 | + for (let i = 0; i < str.length; i++) { |
| 6 | + const code = str.charCodeAt(i); |
| 7 | + if (code < 0x80) { |
| 8 | + bytes += 1; |
| 9 | + } else if (code < 0x800) { |
| 10 | + bytes += 2; |
| 11 | + } else if (code < 0xd800 || code >= 0xe000) { |
| 12 | + bytes += 3; |
| 13 | + } else { |
| 14 | + i++; |
| 15 | + bytes += 4; |
| 16 | + } |
| 17 | + } |
| 18 | + return bytes; |
| 19 | +} |
| 20 | + |
| 21 | +function truncateStringByBytes(str: string, maxBytes: number): string { |
| 22 | + if (getByteSize(str) <= maxBytes) { |
| 23 | + return str; |
| 24 | + } |
| 25 | + |
| 26 | + let truncatedStr = str; |
| 27 | + while (getByteSize(truncatedStr) > maxBytes && truncatedStr.length > 0) { |
| 28 | + truncatedStr = truncatedStr.slice(0, -1); |
| 29 | + } |
| 30 | + return truncatedStr; |
3 | 31 | }
|
4 | 32 |
|
5 | 33 | export function truncateMessagesByBytes(messages: unknown[], maxBytes: number): unknown[] {
|
6 | 34 | if (!Array.isArray(messages) || messages.length === 0) {
|
7 | 35 | return messages;
|
8 | 36 | }
|
9 | 37 |
|
10 |
| - const messagesJson = JSON.stringify(messages); |
11 |
| - const totalBytes = getByteSize(messagesJson); |
| 38 | + let currentSize = getByteSize(JSON.stringify(messages)); |
12 | 39 |
|
13 |
| - if (totalBytes <= maxBytes) { |
| 40 | + if (currentSize <= maxBytes) { |
14 | 41 | return messages;
|
15 | 42 | }
|
16 | 43 |
|
17 |
| - let truncatedMessages = [...messages]; |
| 44 | + let startIndex = 0; |
| 45 | + |
| 46 | + while (startIndex < messages.length - 1 && currentSize > maxBytes) { |
| 47 | + const messageSize = getByteSize(JSON.stringify(messages[startIndex])); |
| 48 | + currentSize -= messageSize; |
| 49 | + startIndex++; |
| 50 | + } |
18 | 51 |
|
19 |
| - while (truncatedMessages.length > 0) { |
20 |
| - const truncatedJson = JSON.stringify(truncatedMessages); |
21 |
| - const truncatedBytes = getByteSize(truncatedJson); |
| 52 | + const remainingMessages = messages.slice(startIndex); |
22 | 53 |
|
23 |
| - if (truncatedBytes <= maxBytes) { |
24 |
| - break; |
25 |
| - } |
| 54 | + if (remainingMessages.length === 1) { |
| 55 | + const singleMessage = remainingMessages[0]; |
| 56 | + const singleMessageSize = getByteSize(JSON.stringify(singleMessage)); |
26 | 57 |
|
27 |
| - truncatedMessages.shift(); |
| 58 | + if (singleMessageSize > maxBytes) { |
| 59 | + if (typeof singleMessage === 'object' && singleMessage !== null && 'content' in singleMessage && typeof (singleMessage as { content: unknown }).content === 'string') { |
| 60 | + const originalContent = (singleMessage as { content: string }).content; |
| 61 | + const messageWithoutContent = { ...singleMessage, content: '' }; |
| 62 | + const otherMessagePartsSize = getByteSize(JSON.stringify(messageWithoutContent)); |
| 63 | + const availableContentBytes = maxBytes - otherMessagePartsSize; |
| 64 | + |
| 65 | + if (availableContentBytes <= 0) { |
| 66 | + return []; |
| 67 | + } |
| 68 | + |
| 69 | + const truncatedContent = truncateStringByBytes(originalContent, availableContentBytes); |
| 70 | + return [{ ...singleMessage, content: truncatedContent }]; |
| 71 | + } else { |
| 72 | + return []; |
| 73 | + } |
| 74 | + } |
28 | 75 | }
|
29 | 76 |
|
30 |
| - return truncatedMessages; |
| 77 | + return remainingMessages; |
31 | 78 | }
|
32 | 79 |
|
33 |
| -export const DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT = 100000; |
34 | 80 |
|
35 | 81 | export function truncateGenAiMessages(messages: unknown[]): unknown[] {
|
36 | 82 | return truncateMessagesByBytes(messages, DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT);
|
|
0 commit comments