Add comprehensive memory leak testing

devin-ai-integration[bot] · stephencornwell · devin-ai-integration[bot] · commit d54e7d86fe8e · 2025-10-21T13:43:02.000Z
- Add test-memory-leaks.cpp with dedicated leak regression tests
- Test model/context/sampler lifecycle patterns
- Test error condition cleanup
- Test concurrent usage with multiple threads
- Test batch operations and KV cache clearing
- Add optional Valgrind integration via CMake target

Tests cover:
- Repeated model load/free cycles (10 iterations)
- Context creation/destruction patterns (10 iterations)
- Multiple contexts sharing same model (5 contexts)
- Sampler lifecycle with chain operations
- Backend initialization cleanup
- Error path cleanup validation (failed model load)
- Concurrent model usage (3 threads)
- Batch allocation/deallocation
- KV cache memory clearing operations
- Model load cancellation (via progress callback)

The test follows existing patterns from test-autorelease.cpp and
test-thread-safety.cpp, using get-model.cpp helper for model paths
and proper cleanup order (sampler → context → model → backend).

Run with: cmake -DLLAMA_SANITIZE_ADDRESS=ON &amp;&amp; ctest -R test-memory-leaks
Optional: make test-valgrind (requires Valgrind installed)

Related to disabled test-opt.cpp which has known memory leak at line 300
(ggml_opt_alloc called in loop without cleanup between iterations).

Co-Authored-By: Stephen Cornwell &lt;stephen@cognition.ai&gt;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -201,6 +201,7 @@ llama_build_and_test(test-backend-ops.cpp)
 
 llama_build_and_test(test-model-load-cancel.cpp  LABEL "model")
 llama_build_and_test(test-autorelease.cpp        LABEL "model")
+llama_build_and_test(test-memory-leaks.cpp       LABEL "model")
 
 if (NOT GGML_BACKEND_DL)
     # these tests use the backends directly and cannot be built with dynamic loading
@@ -219,3 +220,23 @@ target_link_libraries(${LLAMA_TEST_NAME} PRIVATE mtmd)
 get_filename_component(TEST_TARGET test-c.c NAME_WE)
 add_executable(${TEST_TARGET} test-c.c)
 target_link_libraries(${TEST_TARGET} PRIVATE llama)
+
+# Optional Valgrind target for memory leak checking
+find_program(VALGRIND_EXECUTABLE valgrind)
+if(VALGRIND_EXECUTABLE)
+    add_custom_target(test-valgrind
+        COMMAND ${VALGRIND_EXECUTABLE} 
+            --leak-check=full 
+            --show-leak-kinds=all 
+            --track-origins=yes 
+            --error-exitcode=1
+            ${CMAKE_CURRENT_BINARY_DIR}/test-memory-leaks
+        DEPENDS test-memory-leaks
+        COMMENT "Running memory leak tests with Valgrind"
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    )
+    message(STATUS "Valgrind found: ${VALGRIND_EXECUTABLE}")
+    message(STATUS "Run 'make test-valgrind' to check for memory leaks with Valgrind")
+else()
+    message(STATUS "Valgrind not found - install it for additional leak checking")
+endif()
diff --git a/tests/test-memory-leaks.cpp b/tests/test-memory-leaks.cpp
@@ -0,0 +1,353 @@
+// 
+//
+
+#include "llama.h"
+#include "get-model.h"
+#include <cstdio>
+#include <cstring>
+#include <thread>
+#include <vector>
+#include <atomic>
+
+static void test_model_load_unload_cycles(const char * model_path) {
+    fprintf(stderr, "test_model_load_unload_cycles: ");
+    
+    for (int i = 0; i < 10; i++) {
+        llama_backend_init();
+        
+        auto params = llama_model_default_params();
+        auto * model = llama_model_load_from_file(model_path, params);
+        if (model == nullptr) {
+            fprintf(stderr, "FAILED (model load failed on iteration %d)\n", i);
+            return;
+        }
+        
+        llama_model_free(model);
+        llama_backend_free();
+    }
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_context_lifecycle(const char * model_path) {
+    fprintf(stderr, "test_context_lifecycle: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        auto ctx_params = llama_context_default_params();
+        ctx_params.n_ctx = 512;
+        
+        auto * ctx = llama_init_from_model(model, ctx_params);
+        if (ctx == nullptr) {
+            fprintf(stderr, "FAILED (context creation failed on iteration %d)\n", i);
+            llama_model_free(model);
+            llama_backend_free();
+            return;
+        }
+        
+        llama_free(ctx);
+    }
+    
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_multiple_contexts_same_model(const char * model_path) {
+    fprintf(stderr, "test_multiple_contexts_same_model: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    const int num_contexts = 5;
+    std::vector<llama_context *> contexts(num_contexts);
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    
+    for (int i = 0; i < num_contexts; i++) {
+        contexts[i] = llama_init_from_model(model, ctx_params);
+        if (contexts[i] == nullptr) {
+            fprintf(stderr, "FAILED (context %d creation failed)\n", i);
+            for (int j = 0; j < i; j++) {
+                llama_free(contexts[j]);
+            }
+            llama_model_free(model);
+            llama_backend_free();
+            return;
+        }
+    }
+    
+    for (auto * ctx : contexts) {
+        llama_free(ctx);
+    }
+    
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_sampler_lifecycle(const char * model_path) {
+    fprintf(stderr, "test_sampler_lifecycle: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    auto * ctx = llama_init_from_model(model, ctx_params);
+    if (ctx == nullptr) {
+        fprintf(stderr, "FAILED (context creation failed)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        auto sparams = llama_sampler_chain_default_params();
+        auto * smpl = llama_sampler_chain_init(sparams);
+        if (smpl == nullptr) {
+            fprintf(stderr, "FAILED (sampler creation failed on iteration %d)\n", i);
+            llama_free(ctx);
+            llama_model_free(model);
+            llama_backend_free();
+            return;
+        }
+        
+        llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
+        llama_sampler_free(smpl);
+    }
+    
+    llama_free(ctx);
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_error_condition_cleanup(const char * /* model_path */) {
+    fprintf(stderr, "test_error_condition_cleanup: ");
+    
+    llama_backend_init();
+    
+    auto params = llama_model_default_params();
+    auto * model = llama_model_load_from_file("/nonexistent/path/to/model.gguf", params);
+    if (model != nullptr) {
+        fprintf(stderr, "FAILED (expected nullptr for nonexistent model)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_model_load_cancel(const char * model_path) {
+    fprintf(stderr, "test_model_load_cancel: ");
+    
+    llama_backend_init();
+    
+    auto params = llama_model_default_params();
+    params.use_mmap = false;
+    params.progress_callback = [](float progress, void * ctx) {
+        (void) ctx;
+        return progress > 0.50f;
+    };
+    
+    auto * model = llama_model_load_from_file(model_path, params);
+    
+    if (model != nullptr) {
+        llama_model_free(model);
+    }
+    
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_batch_operations(const char * model_path) {
+    fprintf(stderr, "test_batch_operations: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    auto * ctx = llama_init_from_model(model, ctx_params);
+    if (ctx == nullptr) {
+        fprintf(stderr, "FAILED (context creation failed)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        llama_batch batch = llama_batch_init(32, 0, 1);
+        
+        llama_batch_free(batch);
+    }
+    
+    llama_free(ctx);
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_backend_init_free_cycles() {
+    fprintf(stderr, "test_backend_init_free_cycles: ");
+    
+    for (int i = 0; i < 10; i++) {
+        llama_backend_init();
+        llama_backend_free();
+    }
+    
+    fprintf(stderr, "OK\n");
+}
+
+static void test_threaded_contexts(const char * model_path) {
+    fprintf(stderr, "test_threaded_contexts: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    std::atomic<bool> failed = false;
+    std::vector<std::thread> threads;
+    const int num_threads = 3;
+    
+    for (int t = 0; t < num_threads; t++) {
+        threads.emplace_back([&, t, model]() {
+            auto ctx_params = llama_context_default_params();
+            ctx_params.n_ctx = 512;
+            
+            auto * ctx = llama_init_from_model(model, ctx_params);
+            if (ctx == nullptr) {
+                failed.store(true);
+                return;
+            }
+            
+            auto sparams = llama_sampler_chain_default_params();
+            auto * smpl = llama_sampler_chain_init(sparams);
+            if (smpl == nullptr) {
+                llama_free(ctx);
+                failed.store(true);
+                return;
+            }
+            
+            llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
+            
+            llama_sampler_free(smpl);
+            llama_free(ctx);
+        });
+    }
+    
+    for (auto & thread : threads) {
+        thread.join();
+    }
+    
+    llama_model_free(model);
+    llama_backend_free();
+    
+    if (failed) {
+        fprintf(stderr, "FAILED (thread error)\n");
+    } else {
+        fprintf(stderr, "OK\n");
+    }
+}
+
+static void test_kv_cache_clear_operations(const char * model_path) {
+    fprintf(stderr, "test_kv_cache_clear_operations: ");
+    
+    llama_backend_init();
+    
+    auto model_params = llama_model_default_params();
+    auto * model = llama_model_load_from_file(model_path, model_params);
+    if (model == nullptr) {
+        fprintf(stderr, "FAILED (model load failed)\n");
+        llama_backend_free();
+        return;
+    }
+    
+    auto ctx_params = llama_context_default_params();
+    ctx_params.n_ctx = 512;
+    auto * ctx = llama_init_from_model(model, ctx_params);
+    if (ctx == nullptr) {
+        fprintf(stderr, "FAILED (context creation failed)\n");
+        llama_model_free(model);
+        llama_backend_free();
+        return;
+    }
+    
+    for (int i = 0; i < 10; i++) {
+        llama_memory_t mem = llama_get_memory(ctx);
+        llama_memory_clear(mem, false);
+    }
+    
+    llama_free(ctx);
+    llama_model_free(model);
+    llama_backend_free();
+    
+    fprintf(stderr, "OK\n");
+}
+
+int main(int argc, char ** argv) {
+    auto * model_path = get_model_or_exit(argc, argv);
+    
+    fprintf(stderr, "Running memory leak regression tests...\n\n");
+    
+    test_backend_init_free_cycles();
+    test_model_load_unload_cycles(model_path);
+    test_context_lifecycle(model_path);
+    test_multiple_contexts_same_model(model_path);
+    test_sampler_lifecycle(model_path);
+    test_batch_operations(model_path);
+    test_kv_cache_clear_operations(model_path);
+    test_threaded_contexts(model_path);
+    test_model_load_cancel(model_path);
+    test_error_condition_cleanup(model_path);
+    
+    fprintf(stderr, "\nAll memory leak tests completed successfully!\n");
+    
+    return 0;
+}