Skip to content

Commit 52ed642

Browse files
committed
[aux] Auto. memory loading tests
Add some automatic tests that load from memory (single buffer or multiple async splits)
1 parent bde85cd commit 52ed642

File tree

3 files changed

+125
-2
lines changed

3 files changed

+125
-2
lines changed

tests/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ function(llama_build source)
88
endif()
99

1010
add_executable(${TEST_TARGET} ${source})
11-
target_link_libraries(${TEST_TARGET} PRIVATE common)
11+
target_link_libraries(${TEST_TARGET} PRIVATE common llama-common-test)
1212
install(TARGETS ${TEST_TARGET} RUNTIME)
1313
endfunction()
1414

@@ -97,7 +97,7 @@ function(llama_build_and_test source)
9797

9898
add_executable(${TEST_TARGET} ${source} get-model.cpp)
9999
install(TARGETS ${TEST_TARGET} RUNTIME)
100-
target_link_libraries(${TEST_TARGET} PRIVATE common)
100+
target_link_libraries(${TEST_TARGET} PRIVATE common llama-common-test)
101101

102102
add_test(
103103
NAME ${TEST_TARGET}
@@ -198,6 +198,8 @@ llama_build_and_test(test-backend-ops.cpp)
198198

199199
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
200200
llama_build_and_test(test-model-load-disk.cpp LABEL "model")
201+
llama_build_and_test(test-model-load-memory.cpp LABEL "model")
202+
llama_build_and_test(test-model-load-memory-split.cpp LABEL "model")
201203
llama_build_and_test(test-autorelease.cpp LABEL "model")
202204

203205
if (NOT GGML_BACKEND_DL)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#include <cstdlib>
2+
#include <thread>
3+
#include <vector>
4+
5+
#include "get-model.h"
6+
#include "llama-cpp.h"
7+
#include "load_into_memory.h"
8+
9+
int main(int argc, char * argv[]) {
10+
auto * model_path = get_model_or_exit(argc, argv);
11+
12+
if (!is_split_file(model_path)) {
13+
printf("Skipping not-split model %s\n", model_path);
14+
return EXIT_SUCCESS;
15+
}
16+
17+
// Manually load into a memory buffer first
18+
file_entry tensor_list_file = load_tensor_list_file(model_path);
19+
std::vector<file_entry> files = load_files_into_streambuf(model_path);
20+
21+
llama_backend_init();
22+
auto params = llama_model_params{};
23+
params.use_mmap = false;
24+
params.progress_callback = [](float progress, void * ctx) {
25+
(void) ctx;
26+
fprintf(stderr, "%.2f%% ", progress * 100.0f);
27+
// true means: Don't cancel the load
28+
return true;
29+
};
30+
31+
printf("Loading model from %zu files\n", files.size());
32+
33+
std::vector<const char *> file_paths;
34+
for (size_t i = 0; i < files.size(); i++) {
35+
printf("Found file %s \n", files[i].path.c_str());
36+
file_paths.push_back(files[i].path.c_str());
37+
}
38+
39+
const char * async_load_context = "test-model-load";
40+
std::thread fulfill_thread([&files, &tensor_list_file, &async_load_context]() {
41+
const bool success = llama_model_load_fulfill_split_future(tensor_list_file.path.c_str(), async_load_context,
42+
std::move(tensor_list_file.streambuf));
43+
printf("Fulfilling tensor list file %s: %s\n", tensor_list_file.path.c_str(), success ? "success" : "failure");
44+
if (!success) {
45+
exit(EXIT_FAILURE);
46+
}
47+
for (size_t i = 0; i < files.size(); i++) {
48+
const bool success = llama_model_load_fulfill_split_future(files[i].path.c_str(), async_load_context,
49+
std::move(files[i].streambuf));
50+
printf("Fulfilling file %s: %s\n", files[i].path.c_str(), success ? "success" : "failure");
51+
if (!success) {
52+
exit(EXIT_FAILURE);
53+
}
54+
}
55+
});
56+
fprintf(stderr, "Loading model from splits\n");
57+
auto * model = llama_model_load_from_split_futures(file_paths.data(), file_paths.size(), async_load_context,
58+
tensor_list_file.path.c_str(), params);
59+
fulfill_thread.join();
60+
61+
fprintf(stderr, "\n");
62+
63+
if (model == nullptr) {
64+
fprintf(stderr, "Failed to load model\n");
65+
llama_backend_free();
66+
return EXIT_FAILURE;
67+
}
68+
69+
fprintf(stderr, "Model loaded successfully\n");
70+
llama_model_free(model);
71+
llama_backend_free();
72+
73+
return EXIT_SUCCESS;
74+
}

tests/test-model-load-memory.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#include <cstdint>
2+
#include <cstdlib>
3+
#include <vector>
4+
5+
#include "get-model.h"
6+
#include "llama-cpp.h"
7+
#include "load_into_memory.h"
8+
9+
int main(int argc, char * argv[]) {
10+
auto * model_path = get_model_or_exit(argc, argv);
11+
12+
if (is_split_file(model_path)) {
13+
printf("Skipping split model %s\n", model_path);
14+
return EXIT_SUCCESS;
15+
}
16+
17+
// Manually load into a memory buffer first
18+
std::vector<std::uint8_t> buffer = load_file_into_buffer(model_path);
19+
20+
llama_backend_init();
21+
auto params = llama_model_params{};
22+
params.use_mmap = false;
23+
params.progress_callback = [](float progress, void * ctx) {
24+
(void) ctx;
25+
fprintf(stderr, "%.2f%% ", progress * 100.0f);
26+
// true means: Don't cancel the load
27+
return true;
28+
};
29+
30+
// Test that it can load directly from a buffer
31+
printf("Loading model from buffer of size %zu bytes\n", buffer.size());
32+
auto * model = llama_model_load_from_buffer(std::move(buffer), params);
33+
34+
// Add newline after progress output
35+
fprintf(stderr, "\n");
36+
37+
if (model == nullptr) {
38+
fprintf(stderr, "Failed to load model\n");
39+
llama_backend_free();
40+
return EXIT_FAILURE;
41+
}
42+
43+
fprintf(stderr, "Model loaded successfully\n");
44+
llama_model_free(model);
45+
llama_backend_free();
46+
return EXIT_SUCCESS;
47+
}

0 commit comments

Comments
 (0)