llama-router: auto-rescan, admin endpoints, and fixes

ServeurpersoCom · ServeurpersoCom · commit dda9dc6fe073 · 2025-12-01T14:44:26.000+01:00
Auto-rescan models on startup:
- Scan cache directory and add new .gguf files as 'auto' models
- Remove 'auto' models no longer present in cache
- Never touch 'manual' models (user-managed configuration)
- Preserve custom spawn/group settings for existing models
- New /admin/rescan endpoint for on-demand rescanning

Separate admin endpoints:
- Extract /admin routes to router-admin.cpp/h
- Clean separation: router-endpoints.cpp = public API only
- Add RouterApp::update_config() for live config updates
- Support both Bearer token and X-Admin-Token header auth

Fixes:
- Fix /model/(health|props|slots) path rewriting for backends
- Thread-safe streaming: eliminate parent scope captures
- Robust JSON parsing for 'stream' field detection
- Simplified signal handlers (remove redundant stop_all)
- Initialize logger before any LOG_* calls
diff --git a/tools/router/CMakeLists.txt b/tools/router/CMakeLists.txt
@@ -4,6 +4,7 @@ add_executable(${TARGET}
     router.cpp
     router-app.cpp
     router-config.cpp
+    router-admin.cpp
     router-endpoints.cpp
     logging.cpp
     router-process.cpp
diff --git a/tools/router/router-admin.cpp b/tools/router/router-admin.cpp
@@ -0,0 +1,62 @@
+#include "router-admin.h"
+
+#include "log.h"
+#include "router-config.h"
+
+#include <nlohmann/json.hpp>
+
+using json = nlohmann::json;
+
+static bool authorize_admin(const RouterConfig & cfg, const httplib::Request & req, httplib::Response & res) {
+    if (cfg.router.admin_token.empty()) {
+        return true;
+    }
+
+    const std::string bearer = "Bearer " + cfg.router.admin_token;
+    const auto        auth   = req.get_header_value("Authorization");
+    const auto        token  = req.get_header_value("X-Admin-Token");
+
+    if (auth == bearer || token == cfg.router.admin_token) {
+        return true;
+    }
+
+    res.status = 403;
+    res.set_content("{\"error\":\"forbidden\"}", "application/json");
+    LOG_WRN("Admin endpoint rejected unauthorized request from %s:%d\n", req.remote_addr.c_str(), req.remote_port);
+    return false;
+}
+
+void register_admin_routes(httplib::Server & server, RouterApp & app, const std::string & config_path) {
+    server.Post("/admin/reload", [&app](const httplib::Request & req, httplib::Response & res) {
+        if (!authorize_admin(app.get_config(), req, res)) {
+            return;
+        }
+        LOG_INF("Reloading router application: stopping and auto-starting models\n");
+        app.stop_all();
+        app.start_auto_models();
+        res.set_content("{\"status\":\"reloaded\"}", "application/json");
+    });
+
+    server.Get("/admin/rescan", [&app, config_path](const httplib::Request & req, httplib::Response & res) {
+        if (!authorize_admin(app.get_config(), req, res)) {
+            return;
+        }
+
+        const auto rescan_result = rescan_auto_models(app.get_config());
+        LOG_INF("Admin rescan requested, found %zu new models (removed %zu)\n",
+                rescan_result.added,
+                rescan_result.removed);
+        app.update_config(rescan_result.config);
+
+        if (!config_path.empty() && (rescan_result.added > 0 || rescan_result.removed > 0)) {
+            LOG_INF("Persisting updated configuration to %s\n", config_path.c_str());
+            write_config_file(app.get_config(), config_path);
+        }
+
+        json out;
+        out["status"]     = "rescanned";
+        out["new_models"] = rescan_result.added;
+        out["removed"]    = rescan_result.removed;
+        res.set_content(out.dump(), "application/json");
+    });
+}
diff --git a/tools/router/router-admin.h b/tools/router/router-admin.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "router-app.h"
+
+#include <cpp-httplib/httplib.h>
+
+void register_admin_routes(httplib::Server & server, RouterApp & app, const std::string & config_path = std::string());
diff --git a/tools/router/router-app.cpp b/tools/router/router-app.cpp
@@ -148,6 +148,25 @@ std::string RouterApp::get_last_spawned_model() {
     return last_spawned_model;
 }
 
+void RouterApp::update_config(RouterConfig cfg) {
+    std::lock_guard<std::mutex> lock(mutex);
+    config = std::move(cfg);
+
+    model_lookup.clear();
+    for (const auto & model : config.models) {
+        model_lookup.emplace(model.name, model);
+    }
+
+    if (!model_lookup.count(last_spawned_model)) {
+        last_spawned_model.clear();
+    }
+
+    const int desired_base = config.router.base_port;
+    if (desired_base > next_port.load()) {
+        next_port.store(desired_base);
+    }
+}
+
 void RouterApp::stop_all() {
     std::lock_guard<std::mutex> lock(mutex);
     for (auto & kv : processes) {
diff --git a/tools/router/router-app.h b/tools/router/router-app.h
@@ -19,6 +19,7 @@ class RouterApp {
     std::string get_last_spawned_model();
     SpawnConfig get_spawn_config(const std::string & model_name);
     void stop_all();
+    void update_config(RouterConfig cfg);
 
     const RouterConfig & get_config() const { return config; }
 
diff --git a/tools/router/router-config.cpp b/tools/router/router-config.cpp
@@ -11,6 +11,8 @@
 #include <fstream>
 #include <stdexcept>
 #include <vector>
+#include <unordered_map>
+#include <unordered_set>
 
 #if defined(_WIN32)
 #    define WIN32_LEAN_AND_MEAN
@@ -208,6 +210,62 @@ RouterConfig generate_default_config(const std::string & path) {
     return cfg;
 }
 
+RescanResult rescan_auto_models(const RouterConfig & existing) {
+    RescanResult result;
+    result.config = existing;
+
+    RouterConfig & merged = result.config;
+
+    std::unordered_map<std::string, size_t> existing_paths;
+    for (size_t i = 0; i < existing.models.size(); ++i) {
+        existing_paths.emplace(expand_user_path(existing.models[i].path), i);
+    }
+
+    auto scanned = scan_default_models();
+    std::unordered_set<std::string> scanned_paths;
+    for (auto & scanned_model : scanned) {
+        const auto expanded = expand_user_path(scanned_model.path);
+        scanned_paths.insert(expanded);
+        auto it = existing_paths.find(expanded);
+        if (it != existing_paths.end()) {
+            const auto & existing_model = existing.models[it->second];
+            if (existing_model.state == "manual") {
+                continue;
+            }
+
+            continue;
+        }
+
+        if (scanned_model.state.empty()) {
+            scanned_model.state = "auto";
+        }
+        merged.models.push_back(std::move(scanned_model));
+        existing_paths.emplace(expanded, merged.models.size() - 1);
+        ++result.added;
+    }
+
+    std::vector<ModelConfig> filtered;
+    filtered.reserve(merged.models.size());
+    for (const auto & model : merged.models) {
+        if (model.state == "manual") {
+            filtered.push_back(model);
+            continue;
+        }
+
+        const auto expanded = expand_user_path(model.path);
+        const auto found    = scanned_paths.count(expanded) > 0;
+        if (found) {
+            filtered.push_back(model);
+        } else {
+            ++result.removed;
+            LOG_INF("Removing auto model (no longer in cache): %s\n", model.name.c_str());
+        }
+    }
+    merged.models = std::move(filtered);
+
+    return result;
+}
+
 RouterConfig load_config(const std::string & path) {
     RouterConfig cfg;
     cfg.router        = get_default_router_options();
@@ -255,6 +313,10 @@ RouterConfig load_config(const std::string & path) {
     }
     LOG_INF("Config parsed: %zu models, router port %d, base port %d\n", cfg.models.size(), cfg.router.port, cfg.router.base_port);
 
+    const auto rescan_result = rescan_auto_models(cfg);
+    cfg                      = rescan_result.config;
+    LOG_INF("Rescanned models, found %zu new auto models (removed %zu)\n", rescan_result.added, rescan_result.removed);
+
     const auto validate_port = [&](int port, const std::string & name) {
         if (port <= 0 || port > 65535) {
             throw std::runtime_error("invalid " + name + " port in config: " + std::to_string(port));
@@ -284,6 +346,11 @@ RouterConfig load_config(const std::string & path) {
         }
     }
 
+    if (rescan_result.added > 0 || rescan_result.removed > 0) {
+        LOG_INF("Persisting updated configuration after rescan (added %zu, removed %zu)\n", rescan_result.added, rescan_result.removed);
+        write_config_file(cfg, path);
+    }
+
     return cfg;
 }
 
diff --git a/tools/router/router-config.h b/tools/router/router-config.h
@@ -37,6 +37,12 @@ struct RouterConfig {
     std::vector<ModelConfig> models;
 };
 
+struct RescanResult {
+    RouterConfig config;
+    size_t       added   = 0;
+    size_t       removed = 0;
+};
+
 std::string get_default_config_path();
 std::string expand_user_path(const std::string & path);
 const SpawnConfig & get_default_spawn();
@@ -45,5 +51,6 @@ const RouterOptions &             get_default_router_options();
 RouterConfig load_config(const std::string & path);
 RouterConfig generate_default_config(const std::string & path);
 void         write_config_file(const RouterConfig & cfg, const std::string & path);
+RescanResult rescan_auto_models(const RouterConfig & existing);
 
 std::string get_model_group(const ModelConfig & cfg);
diff --git a/tools/router/router-endpoints.cpp b/tools/router/router-endpoints.cpp
@@ -2,7 +2,6 @@
 
 #include "log.h"
 #include "router-app.h"
-#include "router-config.h"
 #include "router-proxy.h"
 
 #include <nlohmann/json.hpp>
@@ -35,25 +34,6 @@ static bool parse_model_from_chat(const httplib::Request & req, std::string & mo
     return !model.empty();
 }
 
-static bool authorize_admin(const RouterConfig & cfg, const httplib::Request & req, httplib::Response & res) {
-    if (cfg.router.admin_token.empty()) {
-        return true;
-    }
-
-    const std::string bearer = "Bearer " + cfg.router.admin_token;
-    const auto        auth   = req.get_header_value("Authorization");
-    const auto        token  = req.get_header_value("X-Admin-Token");
-
-    if (auth == bearer || token == cfg.router.admin_token) {
-        return true;
-    }
-
-    res.status = 403;
-    res.set_content("{\"error\":\"forbidden\"}", "application/json");
-    LOG_WRN("Admin endpoint rejected unauthorized request from %s:%d\n", req.remote_addr.c_str(), req.remote_port);
-    return false;
-}
-
 void register_routes(httplib::Server & server, RouterApp & app) {
     server.Get("/v1/models", [&app](const httplib::Request &, httplib::Response & res) { handle_models(app, res); });
 
@@ -86,6 +66,8 @@ void register_routes(httplib::Server & server, RouterApp & app) {
         auto model_it = req.matches.begin();
         ++model_it;
         std::string model_name = model_it != req.matches.end() ? model_it->str() : std::string();
+        ++model_it;
+        const std::string endpoint_suffix = model_it != req.matches.end() ? model_it->str() : std::string();
         std::string error;
         if (!app.ensure_running(model_name, error)) {
             LOG_WRN("Model %s unavailable: %s\n", model_name.c_str(), error.c_str());
@@ -95,7 +77,8 @@ void register_routes(httplib::Server & server, RouterApp & app) {
         }
         LOG_INF("Proxying %s for model %s\n", req.path.c_str(), model_name.c_str());
         const auto spawn_cfg = app.get_spawn_config(model_name);
-        proxy_request(req, res, app.upstream_for(model_name), app.get_config().router, spawn_cfg.proxy_endpoints);
+        const std::string corrected_path = "/" + endpoint_suffix;
+        proxy_request(req, res, app.upstream_for(model_name), app.get_config().router, spawn_cfg.proxy_endpoints, corrected_path);
     });
 
     server.Post("/v1/chat/completions", [&app](const httplib::Request & req, httplib::Response & res) {
@@ -120,16 +103,6 @@ void register_routes(httplib::Server & server, RouterApp & app) {
         proxy_request(req, res, app.upstream_for(model), app.get_config().router, spawn_cfg.proxy_endpoints);
     });
 
-    server.Post("/admin/reload", [&app](const httplib::Request & req, httplib::Response & res) {
-        if (!authorize_admin(app.get_config(), req, res)) {
-            return;
-        }
-        LOG_INF("Reloading router application: stopping and auto-starting models\n");
-        app.stop_all();
-        app.start_auto_models();
-        res.set_content("{\"status\":\"reloaded\"}", "application/json");
-    });
-
     server.set_error_handler([](const httplib::Request &, httplib::Response & res) {
         res.status = 404;
         res.set_content("{\"error\":\"not found\"}", "application/json");
diff --git a/tools/router/router-process.cpp b/tools/router/router-process.cpp
@@ -146,8 +146,9 @@ ProcessHandle spawn_process(const std::vector<std::string> & args) {
     }
 
     const std::string binary = args[0];
+    const bool        has_path_separator = binary.find('/') != std::string::npos || binary.find('\\') != std::string::npos;
     std::error_code   ec;
-    if (!std::filesystem::exists(binary, ec)) {
+    if (has_path_separator && !std::filesystem::exists(binary, ec)) {
         LOG_ERR("Binary not found: %s\n", binary.c_str());
         return handle;
     }
diff --git a/tools/router/router-proxy.cpp b/tools/router/router-proxy.cpp
diff --git a/tools/router/router-proxy.h b/tools/router/router-proxy.h
diff --git a/tools/router/router.cpp b/tools/router/router.cpp

Original file line number	Diff line number	Diff line change
`@@ -146,8 +146,9 @@ ProcessHandle spawn_process(const std::vector<std::string> & args) {`
`146`	`146`	`}`
`147`	`147`
`148`	`148`	`const std::string binary = args[0];`
	`149`	`+ const bool has_path_separator = binary.find('/') != std::string::npos \|\| binary.find('\\') != std::string::npos;`
`149`	`150`	`std::error_code ec;`
`150`		`- if (!std::filesystem::exists(binary, ec)) {`
	`151`	`+ if (has_path_separator && !std::filesystem::exists(binary, ec)) {`
`151`	`152`	`LOG_ERR("Binary not found: %s\n", binary.c_str());`
`152`	`153`	`return handle;`
`153`	`154`	`}`