Commit 71c63a4
committed
[aux] Auto. streams memory loading test
Add some automatic tests that load from memory (multiple async splits)
diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h
index 3471d4b..377fc60de 100644
--- a/ggml/include/gguf.h
+++ b/ggml/include/gguf.h
@@ -200,7 +200,7 @@ extern "C" {
}
#endif
-#if defined(__cplusplus) && __cplusplus >= 201703L
+#if defined(__cplusplus)
#include <ios>
GGML_API struct gguf_context * gguf_init_from_buffer(std::basic_streambuf<char>& streambuf, struct gguf_init_params params);
#endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index f1e5c22..87127a1 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -202,6 +202,7 @@ llama_build_and_test(test-backend-ops.cpp)
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
llama_build_and_test(test-model-load-disk.cpp LABEL "model")
llama_build_and_test(test-model-load-memory.cpp LABEL "model")
+llama_build_and_test(test-model-load-memory-split.cpp LABEL "model")
llama_build_and_test(test-autorelease.cpp LABEL "model")
if (NOT GGML_BACKEND_DL)
diff --git a/tests/test-model-load-memory-split.cpp b/tests/test-model-load-memory-split.cpp
new file mode 100644
index 000000000..2d3dd21
--- /dev/null
+++ b/tests/test-model-load-memory-split.cpp
@@ -0,0 +1,76 @@
+#include "get-model.h"
+#include "llama-cpp.h"
+#include "load-into-memory.h"
+
+#include <cstdlib>
+#include <thread>
+#include <vector>
+
+using namespace common_load_into_memory;
+
+int main(int argc, char * argv[]) {
+ auto * model_path = get_model_or_exit(argc, argv);
+
+ if (!is_split_file(model_path)) {
+ printf("Skipping not-split model %s\n", model_path);
+ return EXIT_SUCCESS;
+ }
+
+ // Manually load into a memory buffer first
+ llama_file_entry tensor_list_file = load_tensor_list_file(model_path);
+ std::vector<llama_file_entry> files = load_files_into_streambuf(model_path);
+
+ llama_backend_init();
+ auto params = llama_model_params{};
+ params.use_mmap = false;
+ params.progress_callback = [](float progress, void * ctx) {
+ (void) ctx;
+ fprintf(stderr, "%.2f%% ", progress * 100.0f);
+ // true means: Don't cancel the load
+ return true;
+ };
+
+ printf("Loading model from %zu files\n", files.size());
+
+ std::vector<const char *> file_paths;
+ for (size_t i = 0; i < files.size(); i++) {
+ printf("Found file %s \n", files[i].path.c_str());
+ file_paths.push_back(files[i].path.c_str());
+ }
+
+ const char * async_load_context = "test-model-load";
+ std::thread fulfill_thread([&files, &tensor_list_file, &async_load_context]() {
+ const bool success = llama_model_load_fulfill_split_future(tensor_list_file.path.c_str(), async_load_context,
+ std::move(tensor_list_file.streambuf));
+ printf("Fulfilling tensor list file %s: %s\n", tensor_list_file.path.c_str(), success ? "success" : "failure");
+ if (!success) {
+ exit(EXIT_FAILURE);
+ }
+ for (size_t i = 0; i < files.size(); i++) {
+ const bool success = llama_model_load_fulfill_split_future(files[i].path.c_str(), async_load_context,
+ std::move(files[i].streambuf));
+ printf("Fulfilling file %s: %s\n", files[i].path.c_str(), success ? "success" : "failure");
+ if (!success) {
+ exit(EXIT_FAILURE);
+ }
+ }
+ });
+ fprintf(stderr, "Loading model from splits\n");
+ auto * model = llama_model_load_from_split_futures(file_paths.data(), file_paths.size(), async_load_context,
+ tensor_list_file.path.c_str(), params);
+ fulfill_thread.join();
+
+ fprintf(stderr, "\n");
+
+ if (model == nullptr) {
+ fprintf(stderr, "Failed to load model\n");
+ llama_backend_free();
+ return EXIT_FAILURE;
+ }
+
+ fprintf(stderr, "Model loaded successfully\n");
+ llama_model_free(model);
+ llama_backend_free();
+
+ return EXIT_SUCCESS;
+}1 parent a726b69 commit 71c63a4
2 files changed
+84
-0
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
206 | 206 | | |
207 | 207 | | |
208 | 208 | | |
| 209 | + | |
209 | 210 | | |
210 | 211 | | |
211 | 212 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
0 commit comments