From 808357326c9286ecee506180f47f3b706f22bc49 Mon Sep 17 00:00:00 2001 From: Anivar A Aravind Date: Mon, 21 Jul 2025 02:21:40 +0530 Subject: [PATCH] Fix critical KV cache crash bug causing std::length_error Resolves issue #771 where server crashes with std::length_error when KV cache context shifting attempts to resize cache_tokens vector with integer underflow. The bug occurs in update_slots() when n_discard >= cache_tokens.size(), causing cache_tokens.resize(size - n_discard) to underflow and request massive memory allocation, triggering std::length_error exception. Changes: - Add bounds checking before cache_tokens.resize() in server.cpp:1714 - Clear cache_tokens when n_discard would cause underflow - Prevent negative n_discard values from causing issues This fix prevents production server crashes reported with Chinese text translation workloads and high memory pressure scenarios. --- llama.cpp/server/server.cpp | 7 ++++++- test_kv_cache_fix.cpp | 41 +++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 test_kv_cache_fix.cpp diff --git a/llama.cpp/server/server.cpp b/llama.cpp/server/server.cpp index 555b0e1098..2373e7d5b6 100644 --- a/llama.cpp/server/server.cpp +++ b/llama.cpp/server/server.cpp @@ -1711,7 +1711,12 @@ struct llama_server_context slot.cache_tokens[i - n_discard] = slot.cache_tokens[i]; } - slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard); + // Prevent integer underflow that causes std::length_error + if (n_discard >= 0 && (size_t)n_discard < slot.cache_tokens.size()) { + slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard); + } else { + slot.cache_tokens.clear(); + } slot.n_past -= n_discard; diff --git a/test_kv_cache_fix.cpp b/test_kv_cache_fix.cpp new file mode 100644 index 0000000000..49650348c7 --- /dev/null +++ b/test_kv_cache_fix.cpp @@ -0,0 +1,41 @@ +#include +#include +#include + +// Test for the KV cache crash fix +void test_cache_tokens_resize_fix() { + std::cout << "Testing KV cache resize fix..." << std::endl; + + // Simulate the problematic condition + std::vector cache_tokens = {1, 2, 3, 4, 5}; + size_t original_size = cache_tokens.size(); + + // Test cases that could cause integer underflow + int n_discard_cases[] = {-1, 0, 3, 5, 10}; + + for (int n_discard : n_discard_cases) { + std::vector test_tokens = cache_tokens; + + std::cout << "Testing n_discard = " << n_discard + << " with cache size = " << test_tokens.size() << std::endl; + + // Apply the fixed logic + if (n_discard >= 0 && (size_t)n_discard < test_tokens.size()) { + test_tokens.resize(test_tokens.size() - n_discard); + std::cout << " Resized to: " << test_tokens.size() << std::endl; + } else { + test_tokens.clear(); + std::cout << " Cleared to: " << test_tokens.size() << std::endl; + } + + // Verify no crash occurred + assert(test_tokens.size() <= original_size); + } + + std::cout << "All test cases passed! KV cache fix works correctly." << std::endl; +} + +int main() { + test_cache_tokens_resize_fix(); + return 0; +} \ No newline at end of file