Skip to content

Commit dda9dc6

Browse files
llama-router: auto-rescan, admin endpoints, and fixes
Auto-rescan models on startup: - Scan cache directory and add new .gguf files as 'auto' models - Remove 'auto' models no longer present in cache - Never touch 'manual' models (user-managed configuration) - Preserve custom spawn/group settings for existing models - New /admin/rescan endpoint for on-demand rescanning Separate admin endpoints: - Extract /admin routes to router-admin.cpp/h - Clean separation: router-endpoints.cpp = public API only - Add RouterApp::update_config() for live config updates - Support both Bearer token and X-Admin-Token header auth Fixes: - Fix /model/(health|props|slots) path rewriting for backends - Thread-safe streaming: eliminate parent scope captures - Robust JSON parsing for 'stream' field detection - Simplified signal handlers (remove redundant stop_all) - Initialize logger before any LOG_* calls
1 parent d2123eb commit dda9dc6

File tree

12 files changed

+226
-58
lines changed

12 files changed

+226
-58
lines changed

tools/router/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_executable(${TARGET}
44
router.cpp
55
router-app.cpp
66
router-config.cpp
7+
router-admin.cpp
78
router-endpoints.cpp
89
logging.cpp
910
router-process.cpp

tools/router/router-admin.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#include "router-admin.h"
2+
3+
#include "log.h"
4+
#include "router-config.h"
5+
6+
#include <nlohmann/json.hpp>
7+
8+
using json = nlohmann::json;
9+
10+
static bool authorize_admin(const RouterConfig & cfg, const httplib::Request & req, httplib::Response & res) {
11+
if (cfg.router.admin_token.empty()) {
12+
return true;
13+
}
14+
15+
const std::string bearer = "Bearer " + cfg.router.admin_token;
16+
const auto auth = req.get_header_value("Authorization");
17+
const auto token = req.get_header_value("X-Admin-Token");
18+
19+
if (auth == bearer || token == cfg.router.admin_token) {
20+
return true;
21+
}
22+
23+
res.status = 403;
24+
res.set_content("{\"error\":\"forbidden\"}", "application/json");
25+
LOG_WRN("Admin endpoint rejected unauthorized request from %s:%d\n", req.remote_addr.c_str(), req.remote_port);
26+
return false;
27+
}
28+
29+
void register_admin_routes(httplib::Server & server, RouterApp & app, const std::string & config_path) {
30+
server.Post("/admin/reload", [&app](const httplib::Request & req, httplib::Response & res) {
31+
if (!authorize_admin(app.get_config(), req, res)) {
32+
return;
33+
}
34+
LOG_INF("Reloading router application: stopping and auto-starting models\n");
35+
app.stop_all();
36+
app.start_auto_models();
37+
res.set_content("{\"status\":\"reloaded\"}", "application/json");
38+
});
39+
40+
server.Get("/admin/rescan", [&app, config_path](const httplib::Request & req, httplib::Response & res) {
41+
if (!authorize_admin(app.get_config(), req, res)) {
42+
return;
43+
}
44+
45+
const auto rescan_result = rescan_auto_models(app.get_config());
46+
LOG_INF("Admin rescan requested, found %zu new models (removed %zu)\n",
47+
rescan_result.added,
48+
rescan_result.removed);
49+
app.update_config(rescan_result.config);
50+
51+
if (!config_path.empty() && (rescan_result.added > 0 || rescan_result.removed > 0)) {
52+
LOG_INF("Persisting updated configuration to %s\n", config_path.c_str());
53+
write_config_file(app.get_config(), config_path);
54+
}
55+
56+
json out;
57+
out["status"] = "rescanned";
58+
out["new_models"] = rescan_result.added;
59+
out["removed"] = rescan_result.removed;
60+
res.set_content(out.dump(), "application/json");
61+
});
62+
}

tools/router/router-admin.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#pragma once
2+
3+
#include "router-app.h"
4+
5+
#include <cpp-httplib/httplib.h>
6+
7+
void register_admin_routes(httplib::Server & server, RouterApp & app, const std::string & config_path = std::string());

tools/router/router-app.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,25 @@ std::string RouterApp::get_last_spawned_model() {
148148
return last_spawned_model;
149149
}
150150

151+
void RouterApp::update_config(RouterConfig cfg) {
152+
std::lock_guard<std::mutex> lock(mutex);
153+
config = std::move(cfg);
154+
155+
model_lookup.clear();
156+
for (const auto & model : config.models) {
157+
model_lookup.emplace(model.name, model);
158+
}
159+
160+
if (!model_lookup.count(last_spawned_model)) {
161+
last_spawned_model.clear();
162+
}
163+
164+
const int desired_base = config.router.base_port;
165+
if (desired_base > next_port.load()) {
166+
next_port.store(desired_base);
167+
}
168+
}
169+
151170
void RouterApp::stop_all() {
152171
std::lock_guard<std::mutex> lock(mutex);
153172
for (auto & kv : processes) {

tools/router/router-app.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class RouterApp {
1919
std::string get_last_spawned_model();
2020
SpawnConfig get_spawn_config(const std::string & model_name);
2121
void stop_all();
22+
void update_config(RouterConfig cfg);
2223

2324
const RouterConfig & get_config() const { return config; }
2425

tools/router/router-config.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <fstream>
1212
#include <stdexcept>
1313
#include <vector>
14+
#include <unordered_map>
15+
#include <unordered_set>
1416

1517
#if defined(_WIN32)
1618
# define WIN32_LEAN_AND_MEAN
@@ -208,6 +210,62 @@ RouterConfig generate_default_config(const std::string & path) {
208210
return cfg;
209211
}
210212

213+
RescanResult rescan_auto_models(const RouterConfig & existing) {
214+
RescanResult result;
215+
result.config = existing;
216+
217+
RouterConfig & merged = result.config;
218+
219+
std::unordered_map<std::string, size_t> existing_paths;
220+
for (size_t i = 0; i < existing.models.size(); ++i) {
221+
existing_paths.emplace(expand_user_path(existing.models[i].path), i);
222+
}
223+
224+
auto scanned = scan_default_models();
225+
std::unordered_set<std::string> scanned_paths;
226+
for (auto & scanned_model : scanned) {
227+
const auto expanded = expand_user_path(scanned_model.path);
228+
scanned_paths.insert(expanded);
229+
auto it = existing_paths.find(expanded);
230+
if (it != existing_paths.end()) {
231+
const auto & existing_model = existing.models[it->second];
232+
if (existing_model.state == "manual") {
233+
continue;
234+
}
235+
236+
continue;
237+
}
238+
239+
if (scanned_model.state.empty()) {
240+
scanned_model.state = "auto";
241+
}
242+
merged.models.push_back(std::move(scanned_model));
243+
existing_paths.emplace(expanded, merged.models.size() - 1);
244+
++result.added;
245+
}
246+
247+
std::vector<ModelConfig> filtered;
248+
filtered.reserve(merged.models.size());
249+
for (const auto & model : merged.models) {
250+
if (model.state == "manual") {
251+
filtered.push_back(model);
252+
continue;
253+
}
254+
255+
const auto expanded = expand_user_path(model.path);
256+
const auto found = scanned_paths.count(expanded) > 0;
257+
if (found) {
258+
filtered.push_back(model);
259+
} else {
260+
++result.removed;
261+
LOG_INF("Removing auto model (no longer in cache): %s\n", model.name.c_str());
262+
}
263+
}
264+
merged.models = std::move(filtered);
265+
266+
return result;
267+
}
268+
211269
RouterConfig load_config(const std::string & path) {
212270
RouterConfig cfg;
213271
cfg.router = get_default_router_options();
@@ -255,6 +313,10 @@ RouterConfig load_config(const std::string & path) {
255313
}
256314
LOG_INF("Config parsed: %zu models, router port %d, base port %d\n", cfg.models.size(), cfg.router.port, cfg.router.base_port);
257315

316+
const auto rescan_result = rescan_auto_models(cfg);
317+
cfg = rescan_result.config;
318+
LOG_INF("Rescanned models, found %zu new auto models (removed %zu)\n", rescan_result.added, rescan_result.removed);
319+
258320
const auto validate_port = [&](int port, const std::string & name) {
259321
if (port <= 0 || port > 65535) {
260322
throw std::runtime_error("invalid " + name + " port in config: " + std::to_string(port));
@@ -284,6 +346,11 @@ RouterConfig load_config(const std::string & path) {
284346
}
285347
}
286348

349+
if (rescan_result.added > 0 || rescan_result.removed > 0) {
350+
LOG_INF("Persisting updated configuration after rescan (added %zu, removed %zu)\n", rescan_result.added, rescan_result.removed);
351+
write_config_file(cfg, path);
352+
}
353+
287354
return cfg;
288355
}
289356

tools/router/router-config.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ struct RouterConfig {
3737
std::vector<ModelConfig> models;
3838
};
3939

40+
struct RescanResult {
41+
RouterConfig config;
42+
size_t added = 0;
43+
size_t removed = 0;
44+
};
45+
4046
std::string get_default_config_path();
4147
std::string expand_user_path(const std::string & path);
4248
const SpawnConfig & get_default_spawn();
@@ -45,5 +51,6 @@ const RouterOptions & get_default_router_options();
4551
RouterConfig load_config(const std::string & path);
4652
RouterConfig generate_default_config(const std::string & path);
4753
void write_config_file(const RouterConfig & cfg, const std::string & path);
54+
RescanResult rescan_auto_models(const RouterConfig & existing);
4855

4956
std::string get_model_group(const ModelConfig & cfg);

tools/router/router-endpoints.cpp

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
#include "log.h"
44
#include "router-app.h"
5-
#include "router-config.h"
65
#include "router-proxy.h"
76

87
#include <nlohmann/json.hpp>
@@ -35,25 +34,6 @@ static bool parse_model_from_chat(const httplib::Request & req, std::string & mo
3534
return !model.empty();
3635
}
3736

38-
static bool authorize_admin(const RouterConfig & cfg, const httplib::Request & req, httplib::Response & res) {
39-
if (cfg.router.admin_token.empty()) {
40-
return true;
41-
}
42-
43-
const std::string bearer = "Bearer " + cfg.router.admin_token;
44-
const auto auth = req.get_header_value("Authorization");
45-
const auto token = req.get_header_value("X-Admin-Token");
46-
47-
if (auth == bearer || token == cfg.router.admin_token) {
48-
return true;
49-
}
50-
51-
res.status = 403;
52-
res.set_content("{\"error\":\"forbidden\"}", "application/json");
53-
LOG_WRN("Admin endpoint rejected unauthorized request from %s:%d\n", req.remote_addr.c_str(), req.remote_port);
54-
return false;
55-
}
56-
5737
void register_routes(httplib::Server & server, RouterApp & app) {
5838
server.Get("/v1/models", [&app](const httplib::Request &, httplib::Response & res) { handle_models(app, res); });
5939

@@ -86,6 +66,8 @@ void register_routes(httplib::Server & server, RouterApp & app) {
8666
auto model_it = req.matches.begin();
8767
++model_it;
8868
std::string model_name = model_it != req.matches.end() ? model_it->str() : std::string();
69+
++model_it;
70+
const std::string endpoint_suffix = model_it != req.matches.end() ? model_it->str() : std::string();
8971
std::string error;
9072
if (!app.ensure_running(model_name, error)) {
9173
LOG_WRN("Model %s unavailable: %s\n", model_name.c_str(), error.c_str());
@@ -95,7 +77,8 @@ void register_routes(httplib::Server & server, RouterApp & app) {
9577
}
9678
LOG_INF("Proxying %s for model %s\n", req.path.c_str(), model_name.c_str());
9779
const auto spawn_cfg = app.get_spawn_config(model_name);
98-
proxy_request(req, res, app.upstream_for(model_name), app.get_config().router, spawn_cfg.proxy_endpoints);
80+
const std::string corrected_path = "/" + endpoint_suffix;
81+
proxy_request(req, res, app.upstream_for(model_name), app.get_config().router, spawn_cfg.proxy_endpoints, corrected_path);
9982
});
10083

10184
server.Post("/v1/chat/completions", [&app](const httplib::Request & req, httplib::Response & res) {
@@ -120,16 +103,6 @@ void register_routes(httplib::Server & server, RouterApp & app) {
120103
proxy_request(req, res, app.upstream_for(model), app.get_config().router, spawn_cfg.proxy_endpoints);
121104
});
122105

123-
server.Post("/admin/reload", [&app](const httplib::Request & req, httplib::Response & res) {
124-
if (!authorize_admin(app.get_config(), req, res)) {
125-
return;
126-
}
127-
LOG_INF("Reloading router application: stopping and auto-starting models\n");
128-
app.stop_all();
129-
app.start_auto_models();
130-
res.set_content("{\"status\":\"reloaded\"}", "application/json");
131-
});
132-
133106
server.set_error_handler([](const httplib::Request &, httplib::Response & res) {
134107
res.status = 404;
135108
res.set_content("{\"error\":\"not found\"}", "application/json");

tools/router/router-process.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,9 @@ ProcessHandle spawn_process(const std::vector<std::string> & args) {
146146
}
147147

148148
const std::string binary = args[0];
149+
const bool has_path_separator = binary.find('/') != std::string::npos || binary.find('\\') != std::string::npos;
149150
std::error_code ec;
150-
if (!std::filesystem::exists(binary, ec)) {
151+
if (has_path_separator && !std::filesystem::exists(binary, ec)) {
151152
LOG_ERR("Binary not found: %s\n", binary.c_str());
152153
return handle;
153154
}

0 commit comments

Comments
 (0)