server: fix router model discovery and child process spawning

ServeurpersoCom · ServeurpersoCom · commit e03668aa1b33 · 2025-12-09T06:53:02.000+01:00
- Sanitize model names: replace / and \ with _ for display
- Recursive directory scan with relative path storage
- Convert relative paths to absolute when spawning children
- Filter router control args from child processes
- Refresh args after port assignment for correct port value
- Fallback preset lookup for compatibility
- Fix missing argv[0]: store server binary path before base_args parsing
diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp
@@ -7,6 +7,7 @@
 #include <cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
 #include <sheredom/subprocess.h>
 
+#include <cstdio>
 #include <functional>
 #include <algorithm>
 #include <thread>
@@ -77,57 +78,87 @@ static std::filesystem::path get_server_exec_path() {
 
 struct local_model {
     std::string name;
+    std::string display_name;
     std::string path;
     std::string path_mmproj;
 };
 
+static std::string sanitize_model_name(const std::string & name) {
+    std::string sanitized = name;
+    string_replace_all(sanitized, "/", "_");
+    string_replace_all(sanitized, "\\", "_");
+    return sanitized;
+}
+
 static std::vector<local_model> list_local_models(const std::string & dir) {
     if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
         throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
     }
 
     std::vector<local_model> models;
-    auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
-        auto files = fs_list(subdir_path, false);
-        common_file_info model_file;
-        common_file_info first_shard_file;
-        common_file_info mmproj_file;
-        for (const auto & file : files) {
-            if (string_ends_with(file.name, ".gguf")) {
-                if (file.name.find("mmproj") != std::string::npos) {
-                    mmproj_file = file;
-                } else if (file.name.find("-00001-of-") != std::string::npos) {
-                    first_shard_file = file;
-                } else {
-                    model_file = file;
+    std::function<void(const std::string &, const std::string &)> scan_subdir =
+        [&](const std::string & subdir_path, const std::string & name) {
+            auto files = fs_list(subdir_path, true);  // Need directories for recursion
+            common_file_info model_file;
+            common_file_info first_shard_file;
+            common_file_info mmproj_file;
+
+            for (const auto & file : files) {
+                if (file.is_dir) {
+                    const std::string child_name = name.empty() ? file.name : name + "/" + file.name;
+                    scan_subdir(file.path, child_name);
+                    continue;
+                }
+
+                if (string_ends_with(file.name, ".gguf")) {
+                    if (file.name.find("mmproj") != std::string::npos) {
+                        mmproj_file = file;
+                    } else if (file.name.find("-00001-of-") != std::string::npos) {
+                        first_shard_file = file;
+                    } else {
+                        model_file = file;
+                    }
+                }
+            }
+
+            // Convert absolute paths to relative
+            std::string model_path = first_shard_file.path.empty() ? model_file.path : first_shard_file.path;
+            if (!model_path.empty()) {
+                std::error_code ec;
+                auto rel_path = std::filesystem::relative(model_path, dir, ec);
+                if (!ec) {
+                    model_path = rel_path.generic_string();
+                }
+            }
+
+            std::string mmproj_path = mmproj_file.path;
+            if (!mmproj_path.empty()) {
+                std::error_code ec;
+                auto rel_path = std::filesystem::relative(mmproj_path, dir, ec);
+                if (!ec) {
+                    mmproj_path = rel_path.generic_string();
                 }
             }
-        }
-        // single file model
-        local_model model{
-            /* name        */ name,
-            /* path        */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
-            /* path_mmproj */ mmproj_file.path // can be empty
-        };
-        if (!model.path.empty()) {
-            models.push_back(model);
-        }
-    };
 
-    auto files = fs_list(dir, true);
-    for (const auto & file : files) {
-        if (file.is_dir) {
-            scan_subdir(file.path, file.name);
-        } else if (string_ends_with(file.name, ".gguf")) {
-            // single file model
-            std::string name = file.name;
-            string_replace_all(name, ".gguf", "");
             local_model model{
-                /* name        */ name,
-                /* path        */ file.path,
-                /* path_mmproj */ ""
+                /* name         */ name,
+                /* display_name */ sanitize_model_name(name),
+                /* path         */ model_path,
+                /* path_mmproj  */ mmproj_path // can be empty
             };
-            models.push_back(model);
+            if (!model.path.empty()) {
+                models.push_back(model);
+            }
+        };
+
+    scan_subdir(dir, "");
+
+    // when scanning the root, the name is empty, so adjust names for models directly under models_dir
+    for (auto & model : models) {
+        if (model.name.empty() && !model.path.empty()) {
+            model.name = std::filesystem::path(model.path).filename().string();
+            string_replace_all(model.name, ".gguf", "");
+            model.display_name = sanitize_model_name(model.name);
         }
     }
     return models;
@@ -138,8 +169,8 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
 //
 
 
-server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path)
-        : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
+server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir)
+        : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)), models_dir(models_dir) {
     if (!presets_path.empty()) {
         presets = common_presets_load(presets_path, ctx_params);
         SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str());
@@ -156,6 +187,7 @@ server_presets::server_presets(int argc, char ** argv, common_params & base_para
         if (env == "LLAMA_ARG_PORT" ||
             env == "LLAMA_ARG_HOST" ||
             env == "LLAMA_ARG_ALIAS" ||
+            env == "LLAMA_ARG_MODELS_PRESET" ||
             env == "LLAMA_ARG_API_KEY" ||
             env == "LLAMA_ARG_MODELS_DIR" ||
             env == "LLAMA_ARG_MODELS_MAX" ||
@@ -200,9 +232,17 @@ void server_presets::render_args(server_model_meta & meta) {
     if (meta.in_cache) {
         preset.options[control_args["LLAMA_ARG_HF_REPO"]] = meta.name;
     } else {
-        preset.options[control_args["LLAMA_ARG_MODEL"]] = meta.path;
+        std::string model_path = meta.path;
+        if (!models_dir.empty() && !std::filesystem::path(model_path).is_absolute()) {
+            model_path = models_dir + "/" + model_path;
+        }
+        preset.options[control_args["LLAMA_ARG_MODEL"]] = model_path;
         if (!meta.path_mmproj.empty()) {
-            preset.options[control_args["LLAMA_ARG_MMPROJ"]] = meta.path_mmproj;
+            std::string mmproj_path = meta.path_mmproj;
+            if (!models_dir.empty() && !std::filesystem::path(mmproj_path).is_absolute()) {
+                mmproj_path = models_dir + "/" + mmproj_path;
+            }
+            preset.options[control_args["LLAMA_ARG_MMPROJ"]] = mmproj_path;
         }
     }
     meta.args = preset.to_args();
@@ -216,20 +256,21 @@ server_models::server_models(
         const common_params & params,
         int argc,
         char ** argv,
-        char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
+        char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset, params.models_dir) {
     for (int i = 0; i < argc; i++) {
         base_args.push_back(std::string(argv[i]));
     }
     for (char ** env = envp; *env != nullptr; env++) {
         base_env.push_back(std::string(*env));
     }
     GGML_ASSERT(!base_args.empty());
-    // set binary path
+    // Save binary path before base_args is modified by presets parsing
     try {
-        base_args[0] = get_server_exec_path().string();
+        server_binary_path = get_server_exec_path().string();
     } catch (const std::exception & e) {
         LOG_WRN("failed to get server executable path: %s\n", e.what());
-        LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str());
+        LOG_WRN("using original argv[0] as fallback: %s\n", argv[0]);
+        server_binary_path = std::string(argv[0]);
     }
     // TODO: allow refreshing cached model list
     // add cached models
@@ -258,13 +299,18 @@ server_models::server_models(
     if (!params.models_dir.empty()) {
         auto local_models = list_local_models(params.models_dir);
         for (const auto & model : local_models) {
-            if (mapping.find(model.name) != mapping.end()) {
+            const std::string name = model.display_name;
+            if (mapping.find(name) != mapping.end()) {
                 // already exists in cached models, skip
                 continue;
             }
+            auto preset = presets.get_preset(name);
+            if (preset.name.empty() && name != model.name) {
+                preset = presets.get_preset(model.name);
+            }
             server_model_meta meta{
-                /* preset      */ presets.get_preset(model.name),
-                /* name        */ model.name,
+                /* preset      */ preset,
+                /* name        */ name,
                 /* path        */ model.path,
                 /* path_mmproj */ model.path_mmproj,
                 /* in_cache    */ false,
@@ -445,11 +491,15 @@ void server_models::load(const std::string & name) {
         throw std::runtime_error("failed to get a port number");
     }
 
+    presets.render_args(inst.meta);
+
     inst.subproc = std::make_shared<subprocess_s>();
     {
         SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
 
         std::vector<std::string> child_args = inst.meta.args; // copy
+        // Insert binary path as argv[0]
+        child_args.insert(child_args.begin(), server_binary_path);
         std::vector<std::string> child_env  = base_env; // copy
         child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));
 
diff --git a/tools/server/server-models.h b/tools/server/server-models.h
@@ -75,8 +75,9 @@ struct server_presets {
     common_params_context ctx_params;
     std::map<common_arg, std::string> base_args;
     std::map<std::string, common_arg> control_args; // args reserved for server control
+    std::string models_dir;
 
-    server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir);
+    server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir);
     common_preset get_preset(const std::string & name);
     void render_args(server_model_meta & meta);
 };
@@ -99,6 +100,7 @@ struct server_models {
     common_params base_params;
     std::vector<std::string> base_args;
     std::vector<std::string> base_env;
+    std::string server_binary_path;
 
     server_presets presets;