From 808357326c9286ecee506180f47f3b706f22bc49 Mon Sep 17 00:00:00 2001
From: Anivar A Aravind <anivar@foodhub.com>
Date: Mon, 21 Jul 2025 02:21:40 +0530
Subject: [PATCH] Fix critical KV cache crash bug causing std::length_error

Resolves issue #771 where server crashes with std::length_error when
KV cache context shifting attempts to resize cache_tokens vector with
integer underflow.

The bug occurs in update_slots() when n_discard >= cache_tokens.size(),
causing cache_tokens.resize(size - n_discard) to underflow and request
massive memory allocation, triggering std::length_error exception.

Changes:
- Add bounds checking before cache_tokens.resize() in server.cpp:1714
- Clear cache_tokens when n_discard would cause underflow
- Prevent negative n_discard values from causing issues

This fix prevents production server crashes reported with Chinese text
translation workloads and high memory pressure scenarios.
---
 llama.cpp/server/server.cpp |  7 ++++++-
 test_kv_cache_fix.cpp       | 41 +++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 test_kv_cache_fix.cpp
diff --git a/llama.cpp/server/server.cpp b/llama.cpp/server/server.cpp
index 555b0e1098..2373e7d5b6 100644
--- a/llama.cpp/server/server.cpp
+++ b/llama.cpp/server/server.cpp
@@ -1711,7 +1711,12 @@ struct llama_server_context
                         slot.cache_tokens[i - n_discard] = slot.cache_tokens[i];
                     }
 
-                    slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard);
+                    // Prevent integer underflow that causes std::length_error
+                    if (n_discard >= 0 && (size_t)n_discard < slot.cache_tokens.size()) {
+                        slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard);
+                    } else {
+                        slot.cache_tokens.clear();
+                    }
 
                     slot.n_past -= n_discard;
 
diff --git a/test_kv_cache_fix.cpp b/test_kv_cache_fix.cpp
new file mode 100644
index 0000000000..49650348c7
--- /dev/null
+++ b/test_kv_cache_fix.cpp
@@ -0,0 +1,41 @@
+#include <iostream>
+#include <vector>
+#include <cassert>
+
+// Test for the KV cache crash fix
+void test_cache_tokens_resize_fix() {
+    std::cout << "Testing KV cache resize fix..." << std::endl;
+    
+    // Simulate the problematic condition
+    std::vector<int> cache_tokens = {1, 2, 3, 4, 5};
+    size_t original_size = cache_tokens.size();
+    
+    // Test cases that could cause integer underflow
+    int n_discard_cases[] = {-1, 0, 3, 5, 10};
+    
+    for (int n_discard : n_discard_cases) {
+        std::vector<int> test_tokens = cache_tokens;
+        
+        std::cout << "Testing n_discard = " << n_discard 
+                  << " with cache size = " << test_tokens.size() << std::endl;
+        
+        // Apply the fixed logic
+        if (n_discard >= 0 && (size_t)n_discard < test_tokens.size()) {
+            test_tokens.resize(test_tokens.size() - n_discard);
+            std::cout << "  Resized to: " << test_tokens.size() << std::endl;
+        } else {
+            test_tokens.clear();
+            std::cout << "  Cleared to: " << test_tokens.size() << std::endl;
+        }
+        
+        // Verify no crash occurred
+        assert(test_tokens.size() <= original_size);
+    }
+    
+    std::cout << "All test cases passed! KV cache fix works correctly." << std::endl;
+}
+
+int main() {
+    test_cache_tokens_resize_fix();
+    return 0;
+}
\ No newline at end of file