[aux] Memory example (simple)

jesusmb1995 · jesusmb1995 · commit bde85cdc157d · 2025-07-30T22:48:55.000+02:00
Adapt simple example to showcase how to load from memory. Can be configured with environment variables.

Qwen3, for example, can be used with the simple example.
diff --git a/examples/simple/CMakeLists.txt b/examples/simple/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-simple)
 add_executable(${TARGET} simple.cpp)
 install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(${TARGET} PRIVATE llama llama-common-test ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
@@ -1,15 +1,17 @@
-#include "llama.h"
+#include "llama-cpp.h"
 #include <cstdio>
 #include <cstring>
 #include <string>
-#include <vector>
 
 static void print_usage(int, char ** argv) {
     printf("\nexample usage:\n");
     printf("\n    %s -m model.gguf [-n n_predict] [-ngl n_gpu_layers] [prompt]\n", argv[0]);
+    printf("\n Optional environment variables: LLAMA_EXAMPLE_MEMORY_BUFFER LLAMA_EXAMPLE_MEMORY_BUFFER_SPLIT");
     printf("\n");
 }
 
+#include "load_into_memory.h"
+
 int main(int argc, char ** argv) {
     // path to the model gguf file
     std::string model_path;
@@ -83,12 +85,9 @@ int main(int argc, char ** argv) {
     llama_model_params model_params = llama_model_default_params();
     model_params.n_gpu_layers = ngl;
 
-    llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
-
-    if (model == NULL) {
-        fprintf(stderr , "%s: error: unable to load model\n" , __func__);
-        return 1;
-    }
+    llama_model * model = memory_configuration_env_is_set() ?
+                              load_model_from_memory_configuration(model_path.c_str(), model_params) :
+                              llama_model_load_from_file(model_path.c_str(), model_params);
 
     const llama_vocab * vocab = llama_model_get_vocab(model);
     // tokenize the prompt