Fix critical KV cache crash bug causing std::length_error

anivar · anivar · commit 1f9e927ac5d5 · 2025-07-21T02:17:09.000+05:30
Resolves issue #771 where server crashes with std::length_error when KV cache context shifting attempts to resize cache_tokens vector with integer underflow. The bug occurs in update_slots() when n_discard >= cache_tokens.size(), causing cache_tokens.resize(size - n_discard) to underflow and request massive memory allocation, triggering std::length_error exception. Changes: - Add bounds checking before cache_tokens.resize() in server.cpp:1714 - Clear cache_tokens when n_discard would cause underflow - Prevent negative n_discard values from causing issues This fix prevents production server crashes reported with Chinese text translation workloads and high memory pressure scenarios.
diff --git a/llama.cpp/server/server.cpp b/llama.cpp/server/server.cpp
@@ -1711,7 +1711,12 @@ struct llama_server_context
                         slot.cache_tokens[i - n_discard] = slot.cache_tokens[i];
                     }
 
-                    slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard);
+                    // Prevent integer underflow that causes std::length_error
+                    if (n_discard >= 0 && (size_t)n_discard < slot.cache_tokens.size()) {
+                        slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard);
+                    } else {
+                        slot.cache_tokens.clear();
+                    }
 
                     slot.n_past -= n_discard;
 
diff --git a/test_kv_cache_fix b/test_kv_cache_fix
diff --git a/test_kv_cache_fix.cpp b/test_kv_cache_fix.cpp
@@ -0,0 +1,41 @@
+#include <iostream>
+#include <vector>
+#include <cassert>
+
+// Simulated test for the KV cache fix
+void test_cache_tokens_resize_fix() {
+    std::cout << "Testing KV cache resize fix..." << std::endl;
+    
+    // Simulate the problematic condition
+    std::vector<int> cache_tokens = {1, 2, 3, 4, 5};
+    size_t original_size = cache_tokens.size();
+    
+    // Test cases that could cause integer underflow
+    int n_discard_cases[] = {-1, 0, 3, 5, 10};
+    
+    for (int n_discard : n_discard_cases) {
+        std::vector<int> test_tokens = cache_tokens;
+        
+        std::cout << "Testing n_discard = " << n_discard 
+                  << " with cache size = " << test_tokens.size() << std::endl;
+        
+        // Apply the fixed logic
+        if (n_discard >= 0 && (size_t)n_discard < test_tokens.size()) {
+            test_tokens.resize(test_tokens.size() - n_discard);
+            std::cout << "  Resized to: " << test_tokens.size() << std::endl;
+        } else {
+            test_tokens.clear();
+            std::cout << "  Cleared to: " << test_tokens.size() << std::endl;
+        }
+        
+        // Verify no crash occurred
+        assert(test_tokens.size() <= original_size);
+    }
+    
+    std::cout << "All test cases passed! KV cache fix works correctly." << std::endl;
+}
+
+int main() {
+    test_cache_tokens_resize_fix();
+    return 0;
+}