[aux] Memory example (embedding)

jesusmb1995 · jesusmb1995 · commit 28751d1c92e4 · 2025-07-30T22:48:55.000+02:00
Adapt embedding example to showcase how to load from memory. Can be configured through environment variables.
diff --git a/examples/embedding/CMakeLists.txt b/examples/embedding/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(TARGET llama-embedding)
 add_executable(${TARGET} embedding.cpp)
 install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(${TARGET} PRIVATE common llama llama-common-test ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
@@ -1,15 +1,23 @@
+#include <algorithm>
+#include <chrono>
+#include <cstdint>
+#include <cstdlib>
+#include <ctime>
+#include <fstream>
+#include <thread>
+#include <vector>
+
 #include "arg.h"
 #include "common.h"
+#include "llama-cpp.h"
 #include "log.h"
-#include "llama.h"
-
-#include <ctime>
-#include <algorithm>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 
+#include "load_into_memory.h"
+
 static std::vector<std::string> split_lines(const std::string & s, const std::string & separator = "\n") {
     std::vector<std::string> lines;
     size_t start = 0;
@@ -94,7 +102,15 @@ int main(int argc, char ** argv) {
     llama_numa_init(params.numa);
 
     // load the model
-    common_init_result llama_init = common_init_from_params(params);
+    common_init_result llama_init;
+    if (memory_configuration_env_is_set()) {
+        llama_model_params mparams = common_model_params_to_llama(params);
+        common_init_result iparams;
+        llama_model *      model = load_model_from_memory_configuration(params.model.path.c_str(), mparams);
+        llama_init               = common_init_from_model_and_params(model, std::move(iparams), params);
+    } else {
+        llama_init = common_init_from_params(params);
+    }
 
     llama_model * model = llama_init.model.get();
     llama_context * ctx = llama_init.context.get();