Skip to content

Commit d54e7d8

Browse files
Add comprehensive memory leak testing
- Add test-memory-leaks.cpp with dedicated leak regression tests - Test model/context/sampler lifecycle patterns - Test error condition cleanup - Test concurrent usage with multiple threads - Test batch operations and KV cache clearing - Add optional Valgrind integration via CMake target Tests cover: - Repeated model load/free cycles (10 iterations) - Context creation/destruction patterns (10 iterations) - Multiple contexts sharing same model (5 contexts) - Sampler lifecycle with chain operations - Backend initialization cleanup - Error path cleanup validation (failed model load) - Concurrent model usage (3 threads) - Batch allocation/deallocation - KV cache memory clearing operations - Model load cancellation (via progress callback) The test follows existing patterns from test-autorelease.cpp and test-thread-safety.cpp, using get-model.cpp helper for model paths and proper cleanup order (sampler → context → model → backend). Run with: cmake -DLLAMA_SANITIZE_ADDRESS=ON && ctest -R test-memory-leaks Optional: make test-valgrind (requires Valgrind installed) Related to disabled test-opt.cpp which has known memory leak at line 300 (ggml_opt_alloc called in loop without cleanup between iterations). Co-Authored-By: Stephen Cornwell <[email protected]>
1 parent 661ae31 commit d54e7d8

File tree

2 files changed

+374
-0
lines changed

2 files changed

+374
-0
lines changed

tests/CMakeLists.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ llama_build_and_test(test-backend-ops.cpp)
201201

202202
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
203203
llama_build_and_test(test-autorelease.cpp LABEL "model")
204+
llama_build_and_test(test-memory-leaks.cpp LABEL "model")
204205

205206
if (NOT GGML_BACKEND_DL)
206207
# these tests use the backends directly and cannot be built with dynamic loading
@@ -219,3 +220,23 @@ target_link_libraries(${LLAMA_TEST_NAME} PRIVATE mtmd)
219220
get_filename_component(TEST_TARGET test-c.c NAME_WE)
220221
add_executable(${TEST_TARGET} test-c.c)
221222
target_link_libraries(${TEST_TARGET} PRIVATE llama)
223+
224+
# Optional Valgrind target for memory leak checking
225+
find_program(VALGRIND_EXECUTABLE valgrind)
226+
if(VALGRIND_EXECUTABLE)
227+
add_custom_target(test-valgrind
228+
COMMAND ${VALGRIND_EXECUTABLE}
229+
--leak-check=full
230+
--show-leak-kinds=all
231+
--track-origins=yes
232+
--error-exitcode=1
233+
${CMAKE_CURRENT_BINARY_DIR}/test-memory-leaks
234+
DEPENDS test-memory-leaks
235+
COMMENT "Running memory leak tests with Valgrind"
236+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
237+
)
238+
message(STATUS "Valgrind found: ${VALGRIND_EXECUTABLE}")
239+
message(STATUS "Run 'make test-valgrind' to check for memory leaks with Valgrind")
240+
else()
241+
message(STATUS "Valgrind not found - install it for additional leak checking")
242+
endif()

tests/test-memory-leaks.cpp

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
//
2+
//
3+
4+
#include "llama.h"
5+
#include "get-model.h"
6+
#include <cstdio>
7+
#include <cstring>
8+
#include <thread>
9+
#include <vector>
10+
#include <atomic>
11+
12+
static void test_model_load_unload_cycles(const char * model_path) {
13+
fprintf(stderr, "test_model_load_unload_cycles: ");
14+
15+
for (int i = 0; i < 10; i++) {
16+
llama_backend_init();
17+
18+
auto params = llama_model_default_params();
19+
auto * model = llama_model_load_from_file(model_path, params);
20+
if (model == nullptr) {
21+
fprintf(stderr, "FAILED (model load failed on iteration %d)\n", i);
22+
return;
23+
}
24+
25+
llama_model_free(model);
26+
llama_backend_free();
27+
}
28+
29+
fprintf(stderr, "OK\n");
30+
}
31+
32+
static void test_context_lifecycle(const char * model_path) {
33+
fprintf(stderr, "test_context_lifecycle: ");
34+
35+
llama_backend_init();
36+
37+
auto model_params = llama_model_default_params();
38+
auto * model = llama_model_load_from_file(model_path, model_params);
39+
if (model == nullptr) {
40+
fprintf(stderr, "FAILED (model load failed)\n");
41+
llama_backend_free();
42+
return;
43+
}
44+
45+
for (int i = 0; i < 10; i++) {
46+
auto ctx_params = llama_context_default_params();
47+
ctx_params.n_ctx = 512;
48+
49+
auto * ctx = llama_init_from_model(model, ctx_params);
50+
if (ctx == nullptr) {
51+
fprintf(stderr, "FAILED (context creation failed on iteration %d)\n", i);
52+
llama_model_free(model);
53+
llama_backend_free();
54+
return;
55+
}
56+
57+
llama_free(ctx);
58+
}
59+
60+
llama_model_free(model);
61+
llama_backend_free();
62+
63+
fprintf(stderr, "OK\n");
64+
}
65+
66+
static void test_multiple_contexts_same_model(const char * model_path) {
67+
fprintf(stderr, "test_multiple_contexts_same_model: ");
68+
69+
llama_backend_init();
70+
71+
auto model_params = llama_model_default_params();
72+
auto * model = llama_model_load_from_file(model_path, model_params);
73+
if (model == nullptr) {
74+
fprintf(stderr, "FAILED (model load failed)\n");
75+
llama_backend_free();
76+
return;
77+
}
78+
79+
const int num_contexts = 5;
80+
std::vector<llama_context *> contexts(num_contexts);
81+
82+
auto ctx_params = llama_context_default_params();
83+
ctx_params.n_ctx = 512;
84+
85+
for (int i = 0; i < num_contexts; i++) {
86+
contexts[i] = llama_init_from_model(model, ctx_params);
87+
if (contexts[i] == nullptr) {
88+
fprintf(stderr, "FAILED (context %d creation failed)\n", i);
89+
for (int j = 0; j < i; j++) {
90+
llama_free(contexts[j]);
91+
}
92+
llama_model_free(model);
93+
llama_backend_free();
94+
return;
95+
}
96+
}
97+
98+
for (auto * ctx : contexts) {
99+
llama_free(ctx);
100+
}
101+
102+
llama_model_free(model);
103+
llama_backend_free();
104+
105+
fprintf(stderr, "OK\n");
106+
}
107+
108+
static void test_sampler_lifecycle(const char * model_path) {
109+
fprintf(stderr, "test_sampler_lifecycle: ");
110+
111+
llama_backend_init();
112+
113+
auto model_params = llama_model_default_params();
114+
auto * model = llama_model_load_from_file(model_path, model_params);
115+
if (model == nullptr) {
116+
fprintf(stderr, "FAILED (model load failed)\n");
117+
llama_backend_free();
118+
return;
119+
}
120+
121+
auto ctx_params = llama_context_default_params();
122+
ctx_params.n_ctx = 512;
123+
auto * ctx = llama_init_from_model(model, ctx_params);
124+
if (ctx == nullptr) {
125+
fprintf(stderr, "FAILED (context creation failed)\n");
126+
llama_model_free(model);
127+
llama_backend_free();
128+
return;
129+
}
130+
131+
for (int i = 0; i < 10; i++) {
132+
auto sparams = llama_sampler_chain_default_params();
133+
auto * smpl = llama_sampler_chain_init(sparams);
134+
if (smpl == nullptr) {
135+
fprintf(stderr, "FAILED (sampler creation failed on iteration %d)\n", i);
136+
llama_free(ctx);
137+
llama_model_free(model);
138+
llama_backend_free();
139+
return;
140+
}
141+
142+
llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
143+
llama_sampler_free(smpl);
144+
}
145+
146+
llama_free(ctx);
147+
llama_model_free(model);
148+
llama_backend_free();
149+
150+
fprintf(stderr, "OK\n");
151+
}
152+
153+
static void test_error_condition_cleanup(const char * /* model_path */) {
154+
fprintf(stderr, "test_error_condition_cleanup: ");
155+
156+
llama_backend_init();
157+
158+
auto params = llama_model_default_params();
159+
auto * model = llama_model_load_from_file("/nonexistent/path/to/model.gguf", params);
160+
if (model != nullptr) {
161+
fprintf(stderr, "FAILED (expected nullptr for nonexistent model)\n");
162+
llama_model_free(model);
163+
llama_backend_free();
164+
return;
165+
}
166+
167+
llama_backend_free();
168+
169+
fprintf(stderr, "OK\n");
170+
}
171+
172+
static void test_model_load_cancel(const char * model_path) {
173+
fprintf(stderr, "test_model_load_cancel: ");
174+
175+
llama_backend_init();
176+
177+
auto params = llama_model_default_params();
178+
params.use_mmap = false;
179+
params.progress_callback = [](float progress, void * ctx) {
180+
(void) ctx;
181+
return progress > 0.50f;
182+
};
183+
184+
auto * model = llama_model_load_from_file(model_path, params);
185+
186+
if (model != nullptr) {
187+
llama_model_free(model);
188+
}
189+
190+
llama_backend_free();
191+
192+
fprintf(stderr, "OK\n");
193+
}
194+
195+
static void test_batch_operations(const char * model_path) {
196+
fprintf(stderr, "test_batch_operations: ");
197+
198+
llama_backend_init();
199+
200+
auto model_params = llama_model_default_params();
201+
auto * model = llama_model_load_from_file(model_path, model_params);
202+
if (model == nullptr) {
203+
fprintf(stderr, "FAILED (model load failed)\n");
204+
llama_backend_free();
205+
return;
206+
}
207+
208+
auto ctx_params = llama_context_default_params();
209+
ctx_params.n_ctx = 512;
210+
auto * ctx = llama_init_from_model(model, ctx_params);
211+
if (ctx == nullptr) {
212+
fprintf(stderr, "FAILED (context creation failed)\n");
213+
llama_model_free(model);
214+
llama_backend_free();
215+
return;
216+
}
217+
218+
for (int i = 0; i < 10; i++) {
219+
llama_batch batch = llama_batch_init(32, 0, 1);
220+
221+
llama_batch_free(batch);
222+
}
223+
224+
llama_free(ctx);
225+
llama_model_free(model);
226+
llama_backend_free();
227+
228+
fprintf(stderr, "OK\n");
229+
}
230+
231+
static void test_backend_init_free_cycles() {
232+
fprintf(stderr, "test_backend_init_free_cycles: ");
233+
234+
for (int i = 0; i < 10; i++) {
235+
llama_backend_init();
236+
llama_backend_free();
237+
}
238+
239+
fprintf(stderr, "OK\n");
240+
}
241+
242+
static void test_threaded_contexts(const char * model_path) {
243+
fprintf(stderr, "test_threaded_contexts: ");
244+
245+
llama_backend_init();
246+
247+
auto model_params = llama_model_default_params();
248+
auto * model = llama_model_load_from_file(model_path, model_params);
249+
if (model == nullptr) {
250+
fprintf(stderr, "FAILED (model load failed)\n");
251+
llama_backend_free();
252+
return;
253+
}
254+
255+
std::atomic<bool> failed = false;
256+
std::vector<std::thread> threads;
257+
const int num_threads = 3;
258+
259+
for (int t = 0; t < num_threads; t++) {
260+
threads.emplace_back([&, t, model]() {
261+
auto ctx_params = llama_context_default_params();
262+
ctx_params.n_ctx = 512;
263+
264+
auto * ctx = llama_init_from_model(model, ctx_params);
265+
if (ctx == nullptr) {
266+
failed.store(true);
267+
return;
268+
}
269+
270+
auto sparams = llama_sampler_chain_default_params();
271+
auto * smpl = llama_sampler_chain_init(sparams);
272+
if (smpl == nullptr) {
273+
llama_free(ctx);
274+
failed.store(true);
275+
return;
276+
}
277+
278+
llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
279+
280+
llama_sampler_free(smpl);
281+
llama_free(ctx);
282+
});
283+
}
284+
285+
for (auto & thread : threads) {
286+
thread.join();
287+
}
288+
289+
llama_model_free(model);
290+
llama_backend_free();
291+
292+
if (failed) {
293+
fprintf(stderr, "FAILED (thread error)\n");
294+
} else {
295+
fprintf(stderr, "OK\n");
296+
}
297+
}
298+
299+
static void test_kv_cache_clear_operations(const char * model_path) {
300+
fprintf(stderr, "test_kv_cache_clear_operations: ");
301+
302+
llama_backend_init();
303+
304+
auto model_params = llama_model_default_params();
305+
auto * model = llama_model_load_from_file(model_path, model_params);
306+
if (model == nullptr) {
307+
fprintf(stderr, "FAILED (model load failed)\n");
308+
llama_backend_free();
309+
return;
310+
}
311+
312+
auto ctx_params = llama_context_default_params();
313+
ctx_params.n_ctx = 512;
314+
auto * ctx = llama_init_from_model(model, ctx_params);
315+
if (ctx == nullptr) {
316+
fprintf(stderr, "FAILED (context creation failed)\n");
317+
llama_model_free(model);
318+
llama_backend_free();
319+
return;
320+
}
321+
322+
for (int i = 0; i < 10; i++) {
323+
llama_memory_t mem = llama_get_memory(ctx);
324+
llama_memory_clear(mem, false);
325+
}
326+
327+
llama_free(ctx);
328+
llama_model_free(model);
329+
llama_backend_free();
330+
331+
fprintf(stderr, "OK\n");
332+
}
333+
334+
int main(int argc, char ** argv) {
335+
auto * model_path = get_model_or_exit(argc, argv);
336+
337+
fprintf(stderr, "Running memory leak regression tests...\n\n");
338+
339+
test_backend_init_free_cycles();
340+
test_model_load_unload_cycles(model_path);
341+
test_context_lifecycle(model_path);
342+
test_multiple_contexts_same_model(model_path);
343+
test_sampler_lifecycle(model_path);
344+
test_batch_operations(model_path);
345+
test_kv_cache_clear_operations(model_path);
346+
test_threaded_contexts(model_path);
347+
test_model_load_cancel(model_path);
348+
test_error_condition_cleanup(model_path);
349+
350+
fprintf(stderr, "\nAll memory leak tests completed successfully!\n");
351+
352+
return 0;
353+
}

0 commit comments

Comments
 (0)