|
| 1 | +# Chat Context Shift Strategy {#background} |
| 2 | +When the chat history gets longer than the sequence's context size, we have to remove the oldest tokens from the context state to make room for new tokens to be generated. |
| 3 | +This is called a context shift. |
| 4 | + |
| 5 | +`node-llama-cpp` has a smart mechanism to handle context shifts on the chat level, so the oldest messages are truncated (from their beginning) or removed from the context state, while keeping the system prompt in place to ensure the model follows the guidelines you set for it. |
| 6 | + |
| 7 | +You can override `node-llama-cpp`'s default context shift strategy |
| 8 | +when using [`LlamaChatSession`](../api/classes/LlamaChatSession.md) or [`LlamaChat`](../api/classes/LlamaChat.md) |
| 9 | +by providing a custom context shift strategy. |
| 10 | + |
| 11 | +## The Default Context Shift Strategy {#default-strategy} |
| 12 | +The [default context shift strategy](../api/type-aliases/LLamaChatContextShiftOptions.md#strategy) is `eraseFirstResponseAndKeepFirstSystem`. |
| 13 | + |
| 14 | +This strategy attempts to truncate the oldest model responses (from their beginning) or remove them completely from the chat history while keeping the first system prompt in place. |
| 15 | +If a response is completely removed, the prompt that came before it will be removed as well. |
| 16 | + |
| 17 | +## Implementing a Custom Context Shift Strategy {#custom-strategy} |
| 18 | +A [custom context shift strategy](../api/type-aliases/LLamaChatContextShiftOptions.md#strategy) is a function that receives the full chat history as input and |
| 19 | +returns a new chat history that when tokenized will result in an array of tokens shorter than the desired max size. |
| 20 | + |
| 21 | +The context shift strategy will be called only when the context state needs to be shifted. |
| 22 | + |
| 23 | +If the context shift strategy returns an invalid chat history (e.g., a chat history that is too long), |
| 24 | +the prompting function will abort the evaluation and throw an error. |
| 25 | + |
| 26 | +A custom context shift strategy can be a simple logic that prioritizes which data to remove, |
| 27 | +or it can even use a language model to summarize information to shorten the chat history. |
| 28 | + |
| 29 | +It's important to keep the last user prompt and model response as-is to prevent infinite generation loops. |
| 30 | + |
| 31 | +```typescript |
| 32 | +import {fileURLToPath} from "url"; |
| 33 | +import path from "path"; |
| 34 | +import {getLlama, LlamaChatSession} from "node-llama-cpp"; |
| 35 | + |
| 36 | +const __dirname = path.dirname(fileURLToPath(import.meta.url)); |
| 37 | + |
| 38 | +const llama = await getLlama(); |
| 39 | +const model = await llama.loadModel({ |
| 40 | + modelPath: path.join(__dirname, "models", "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf") |
| 41 | +}); |
| 42 | +const context = await model.createContext(); |
| 43 | + |
| 44 | +// ---cut--- |
| 45 | +const session = new LlamaChatSession({ |
| 46 | + contextSequence: context.getSequence(), |
| 47 | + contextShift: { |
| 48 | + strategy({ |
| 49 | + chatHistory, chatWrapper, maxTokensCount, tokenizer, |
| 50 | + lastShiftMetadata |
| 51 | + }) { |
| 52 | + // clone the chat history to not mutate the original |
| 53 | + const newChatHistory = chatHistory.map( |
| 54 | + (item) => structuredClone(item) |
| 55 | + ); |
| 56 | + |
| 57 | + function getTokensLeftToRemove() { |
| 58 | + const { |
| 59 | + contextText |
| 60 | + } = chatWrapper.generateContextState({chatHistory}); |
| 61 | + const tokenUsage = contextText.tokenize(tokenizer).length; |
| 62 | + |
| 63 | + return Math.max(0, tokenUsage - maxTokensCount); |
| 64 | + } |
| 65 | + |
| 66 | + while (getTokensLeftToRemove() > 0 && newChatHistory.length > 2) { |
| 67 | + for (let i = 0; i < newChatHistory.length - 2; i++) { |
| 68 | + const chatItem = newChatHistory[i]!; |
| 69 | + |
| 70 | + if (i === 0 && chatItem.type === "system") |
| 71 | + // don't remove the first system message |
| 72 | + continue; |
| 73 | + else if (chatItem.type === "model") { |
| 74 | + // remove the model response |
| 75 | + newChatHistory.splice(i, 1); |
| 76 | + i--; |
| 77 | + |
| 78 | + // remove the user messages that |
| 79 | + // came before the model response |
| 80 | + while ( |
| 81 | + i > 0 && |
| 82 | + newChatHistory[i - 1]?.type === "user" |
| 83 | + ) { |
| 84 | + newChatHistory.splice(i - 1, 1); |
| 85 | + i--; |
| 86 | + } |
| 87 | + } else if (chatItem.type === "system") { |
| 88 | + // don't remove system messages on their own |
| 89 | + continue; |
| 90 | + } else if (chatItem.type === "user") { |
| 91 | + // don't remove user messages on their own |
| 92 | + continue; |
| 93 | + } else { |
| 94 | + // ensure we handle all message types. |
| 95 | + // otherwise, this will error |
| 96 | + void (chatItem satisfies never); |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + return { |
| 102 | + chatHistory: newChatHistory, |
| 103 | + |
| 104 | + // this metadata will be passed to the next context shift |
| 105 | + // strategy call as the `lastShiftMetadata` argument |
| 106 | + metadata: {} |
| 107 | + }; |
| 108 | + } |
| 109 | + } |
| 110 | +}); |
| 111 | +``` |
0 commit comments