Skip to content

Commit 3d14f32

Browse files
llama-router: add multi-engine support with configurable spawn and endpoints
- Introduce SpawnConfig struct: command, proxy_endpoints, health_endpoint - Replace vector<string> default_spawn with full SpawnConfig - Support per-model spawn override (vLLM, TGI, etc. alongside llama.cpp) - Implement prefix-based endpoint filtering (simple startswith, no wildcards) - Health endpoint now configurable per spawn config - Validate spawn commands and proxy endpoints before execution Default config enables /v1/, /health, /slots, /props endpoints. Single router can now manage heterogeneous inference backends
1 parent 1862a17 commit 3d14f32

File tree

11 files changed

+123
-32
lines changed

11 files changed

+123
-32
lines changed

tools/router/router-app.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,19 @@ RouterApp::RouterApp(RouterConfig cfg)
1717

1818
RouterApp::~RouterApp() { stop_all(); }
1919

20+
SpawnConfig RouterApp::resolve_spawn_config(const ModelConfig & cfg) const {
21+
return is_spawn_empty(cfg.spawn) ? config.default_spawn : cfg.spawn;
22+
}
23+
24+
SpawnConfig RouterApp::get_spawn_config(const std::string & model_name) {
25+
std::lock_guard<std::mutex> lock(mutex);
26+
auto it = model_lookup.find(model_name);
27+
if (it == model_lookup.end()) {
28+
return config.default_spawn;
29+
}
30+
return resolve_spawn_config(it->second);
31+
}
32+
2033
void RouterApp::start_auto_models() {
2134
for (const auto & model : config.models) {
2235
if (model.state == "auto") {
@@ -78,7 +91,9 @@ bool RouterApp::ensure_running(const std::string & model_name, std::string & err
7891
int port = next_port.fetch_add(1);
7992
model_ports[model_name] = port;
8093

81-
std::vector<std::string> command = cfg.spawn.empty() ? config.default_spawn : cfg.spawn;
94+
const SpawnConfig spawn_cfg = resolve_spawn_config(cfg);
95+
96+
std::vector<std::string> command = spawn_cfg.command;
8297
command.push_back("--model");
8398
command.push_back(expand_user_path(cfg.path));
8499
command.push_back("--port");
@@ -100,7 +115,8 @@ bool RouterApp::ensure_running(const std::string & model_name, std::string & err
100115
last_spawned_model = model_name;
101116
LOG_INF("Spawned %s (group '%s') with %zu args\n", model_name.c_str(), target_group.c_str(), command.size());
102117

103-
if (!wait_for_backend_ready(port, ROUTER_BACKEND_READY_TIMEOUT_MS, &proc_it->second)) {
118+
const std::string health_endpoint = spawn_cfg.health_endpoint.empty() ? "/health" : spawn_cfg.health_endpoint;
119+
if (!wait_for_backend_ready(port, health_endpoint, ROUTER_BACKEND_READY_TIMEOUT_MS, &proc_it->second)) {
104120
error = "backend not ready";
105121
LOG_ERR("Backend for %s did not become ready on port %d within %d ms\n",
106122
model_name.c_str(),

tools/router/router-app.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class RouterApp {
1717
bool ensure_running(const std::string & model_name, std::string & error);
1818
std::string upstream_for(const std::string & model_name);
1919
std::string get_last_spawned_model();
20+
SpawnConfig get_spawn_config(const std::string & model_name);
2021
void stop_all();
2122

2223
const RouterConfig & get_config() const { return config; }
@@ -29,4 +30,6 @@ class RouterApp {
2930
std::unordered_map<std::string, ProcessHandle> processes;
3031
std::unordered_map<std::string, int> model_ports;
3132
std::string last_spawned_model;
33+
34+
SpawnConfig resolve_spawn_config(const ModelConfig & cfg) const;
3235
};

tools/router/router-config.cpp

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,42 @@ static std::string detect_llama_server_binary() {
6060
#endif
6161
}
6262

63-
const std::vector<std::string> & get_default_spawn() {
64-
static const std::vector<std::string> spawn = [] {
65-
std::vector<std::string> default_spawn = {
66-
"llama-server", "--ctx-size", "4096", "--n-gpu-layers", "99",
63+
static SpawnConfig parse_spawn_config(const json & data) {
64+
SpawnConfig spawn;
65+
if (data.contains("command")) {
66+
spawn.command = data["command"].get<std::vector<std::string>>();
67+
}
68+
if (data.contains("proxy_endpoints")) {
69+
spawn.proxy_endpoints = data["proxy_endpoints"].get<std::vector<std::string>>();
70+
}
71+
if (data.contains("health_endpoint")) {
72+
spawn.health_endpoint = data["health_endpoint"].get<std::string>();
73+
}
74+
75+
return spawn;
76+
}
77+
78+
static json serialize_spawn_config(const SpawnConfig & spawn) {
79+
json obj;
80+
obj["command"] = spawn.command;
81+
obj["proxy_endpoints"] = spawn.proxy_endpoints;
82+
obj["health_endpoint"] = spawn.health_endpoint;
83+
return obj;
84+
}
85+
86+
const SpawnConfig & get_default_spawn() {
87+
static const SpawnConfig spawn = [] {
88+
SpawnConfig default_spawn = {
89+
/*command =*/ {"llama-server", "--ctx-size", "4096", "--n-gpu-layers", "99"},
90+
/*proxy_endpoints =*/ {"/v1/", "/health", "/slots", "/props"},
91+
/*health_endpoint =*/ "/health",
6792
};
6893

6994
std::error_code ec;
7095
const std::string detected_path = detect_llama_server_binary();
7196
if (!detected_path.empty() && std::filesystem::exists(detected_path, ec) && !ec) {
7297
LOG_INF("Detected llama-server at %s\n", detected_path.c_str());
73-
default_spawn[0] = detected_path;
98+
default_spawn.command[0] = detected_path;
7499
} else {
75100
LOG_INF("Falling back to llama-server resolved via PATH\n");
76101
}
@@ -135,7 +160,7 @@ static void ensure_parent_directory(const std::string & path) {
135160
void write_config_file(const RouterConfig & cfg, const std::string & path) {
136161
json out;
137162
out["version"] = cfg.version;
138-
out["default_spawn"] = cfg.default_spawn;
163+
out["default_spawn"] = serialize_spawn_config(cfg.default_spawn);
139164
out["router"] = {{"host", cfg.router.host},
140165
{"port", cfg.router.port},
141166
{"base_port", cfg.router.base_port},
@@ -155,8 +180,8 @@ void write_config_file(const RouterConfig & cfg, const std::string & path) {
155180
if (!m.group.empty()) {
156181
obj["group"] = m.group;
157182
}
158-
if (!m.spawn.empty()) {
159-
obj["spawn"] = m.spawn;
183+
if (!is_spawn_empty(m.spawn)) {
184+
obj["spawn"] = serialize_spawn_config(m.spawn);
160185
}
161186
out["models"].push_back(std::move(obj));
162187
}
@@ -204,7 +229,7 @@ RouterConfig load_config(const std::string & path) {
204229
cfg.version = data["version"].get<std::string>();
205230
}
206231
if (data.contains("default_spawn")) {
207-
cfg.default_spawn = data["default_spawn"].get<std::vector<std::string>>();
232+
cfg.default_spawn = parse_spawn_config(data["default_spawn"]);
208233
}
209234
if (data.contains("router")) {
210235
auto r = data["router"];
@@ -223,7 +248,7 @@ RouterConfig load_config(const std::string & path) {
223248
mc.state = m.value("state", "manual");
224249
mc.group = m.value("group", "");
225250
if (m.contains("spawn")) {
226-
mc.spawn = m["spawn"].get<std::vector<std::string>>();
251+
mc.spawn = parse_spawn_config(m["spawn"]);
227252
}
228253
cfg.models.push_back(std::move(mc));
229254
}
@@ -248,11 +273,14 @@ RouterConfig load_config(const std::string & path) {
248273
throw std::runtime_error("model path does not exist: " + path_to_check);
249274
}
250275

251-
if (!model.spawn.empty()) {
252-
const std::string & cmd = model.spawn.front();
253-
if (!cmd.empty() && cmd.find('/') != std::string::npos && !std::filesystem::exists(cmd, ec)) {
254-
throw std::runtime_error("spawn command not executable: " + cmd);
255-
}
276+
const SpawnConfig & spawn = is_spawn_empty(model.spawn) ? cfg.default_spawn : model.spawn;
277+
if (spawn.command.empty()) {
278+
throw std::runtime_error("spawn command missing for model: " + model.name);
279+
}
280+
281+
const std::string & cmd = spawn.command.front();
282+
if (!cmd.empty() && cmd.find('/') != std::string::npos && !std::filesystem::exists(cmd, ec)) {
283+
throw std::runtime_error("spawn command not executable: " + cmd);
256284
}
257285
}
258286

tools/router/router-config.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,22 @@
33
#include <string>
44
#include <vector>
55

6+
struct SpawnConfig {
7+
std::vector<std::string> command;
8+
std::vector<std::string> proxy_endpoints;
9+
std::string health_endpoint;
10+
};
11+
12+
inline bool is_spawn_empty(const SpawnConfig & spawn) {
13+
return spawn.command.empty() && spawn.proxy_endpoints.empty() && spawn.health_endpoint.empty();
14+
}
15+
616
struct ModelConfig {
717
std::string name;
818
std::string path;
919
std::string state;
1020
std::string group;
11-
std::vector<std::string> spawn;
21+
SpawnConfig spawn;
1222
};
1323

1424
struct RouterOptions {
@@ -22,14 +32,14 @@ struct RouterOptions {
2232

2333
struct RouterConfig {
2434
std::string version;
25-
std::vector<std::string> default_spawn;
35+
SpawnConfig default_spawn;
2636
RouterOptions router;
2737
std::vector<ModelConfig> models;
2838
};
2939

3040
std::string get_default_config_path();
3141
std::string expand_user_path(const std::string & path);
32-
const std::vector<std::string> & get_default_spawn();
42+
const SpawnConfig & get_default_spawn();
3343
const RouterOptions & get_default_router_options();
3444

3545
RouterConfig load_config(const std::string & path);

tools/router/router-endpoints.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ void register_routes(httplib::Server & server, RouterApp & app) {
7474
return;
7575
}
7676
LOG_INF("Proxying %s to last spawned model %s\n", req.path.c_str(), model.c_str());
77-
proxy_request(req, res, app.upstream_for(model), app.get_config().router);
77+
const auto spawn_cfg = app.get_spawn_config(model);
78+
proxy_request(req, res, app.upstream_for(model), app.get_config().router, spawn_cfg.proxy_endpoints);
7879
};
7980

8081
server.Get("/props", proxy_last_spawned);
@@ -93,7 +94,8 @@ void register_routes(httplib::Server & server, RouterApp & app) {
9394
return;
9495
}
9596
LOG_INF("Proxying %s for model %s\n", req.path.c_str(), model_name.c_str());
96-
proxy_request(req, res, app.upstream_for(model_name), app.get_config().router);
97+
const auto spawn_cfg = app.get_spawn_config(model_name);
98+
proxy_request(req, res, app.upstream_for(model_name), app.get_config().router, spawn_cfg.proxy_endpoints);
9799
});
98100

99101
server.Post("/v1/chat/completions", [&app](const httplib::Request & req, httplib::Response & res) {
@@ -114,7 +116,8 @@ void register_routes(httplib::Server & server, RouterApp & app) {
114116
}
115117

116118
LOG_INF("Proxying chat completion for model %s\n", model.c_str());
117-
proxy_request(req, res, app.upstream_for(model), app.get_config().router);
119+
const auto spawn_cfg = app.get_spawn_config(model);
120+
proxy_request(req, res, app.upstream_for(model), app.get_config().router, spawn_cfg.proxy_endpoints);
118121
});
119122

120123
server.Post("/admin/reload", [&app](const httplib::Request & req, httplib::Response & res) {

tools/router/router-process.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,16 +320,21 @@ ProcessHandle spawn_process(const std::vector<std::string> & args) {
320320
return handle;
321321
}
322322

323-
bool wait_for_backend_ready(int port, int timeout_ms, const ProcessHandle * process) {
323+
bool wait_for_backend_ready(int port, const std::string & health_endpoint, int timeout_ms, const ProcessHandle * process) {
324324
httplib::Client client("127.0.0.1:" + std::to_string(port));
325325
const auto start = std::chrono::steady_clock::now();
326326
auto next_log_ms = 0;
327327

328-
LOG_INF("Waiting up to %d ms for backend readiness on port %d\n", timeout_ms, port);
328+
const std::string endpoint = health_endpoint.empty() ? "/health" : health_endpoint;
329+
330+
LOG_INF("Waiting up to %d ms for backend readiness on port %d (endpoint %s)\n",
331+
timeout_ms,
332+
port,
333+
endpoint.c_str());
329334

330335
while (true) {
331336
try {
332-
auto res = client.Get("/health");
337+
auto res = client.Get(endpoint.c_str());
333338
if (res && res->status == 200) {
334339
LOG_INF("Backend on port %d reports ready\n", port);
335340
return true;

tools/router/router-process.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,7 @@ void close_process(ProcessHandle & handle);
4242
void terminate_process(ProcessHandle & handle);
4343
bool wait_for_process_exit(const ProcessHandle & handle, int timeout_ms);
4444
ProcessHandle spawn_process(const std::vector<std::string> & args);
45-
bool wait_for_backend_ready(int port, int timeout_ms, const ProcessHandle * process = nullptr);
45+
bool wait_for_backend_ready(int port,
46+
const std::string & health_endpoint,
47+
int timeout_ms,
48+
const ProcessHandle * process = nullptr);

tools/router/router-proxy.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,27 @@ void copy_response_headers(const httplib::Headers & from, httplib::Response & to
1919
to.set_header(h.first, h.second);
2020
}
2121
}
22+
23+
bool matches_any_endpoint(const std::string & path, const std::vector<std::string> & patterns) {
24+
if (patterns.empty()) {
25+
return true;
26+
}
27+
28+
for (const auto & pattern : patterns) {
29+
if (path.compare(0, pattern.size(), pattern) == 0) {
30+
return true;
31+
}
32+
}
33+
34+
return false;
35+
}
2236
} // namespace
2337

2438
bool proxy_request(const httplib::Request & req,
2539
httplib::Response & res,
2640
const std::string & upstream_base,
27-
const RouterOptions & opts) {
41+
const RouterOptions & opts,
42+
const std::vector<std::string> & proxy_endpoints) {
2843
if (upstream_base.empty()) {
2944
res.status = 502;
3045
res.set_content("{\"error\":\"missing upstream\"}", "application/json");
@@ -41,6 +56,13 @@ bool proxy_request(const httplib::Request & req,
4156

4257
const std::string path = !req.target.empty() ? req.target : req.path;
4358

59+
if (!matches_any_endpoint(path, proxy_endpoints)) {
60+
LOG_WRN("Request %s not proxied because it does not match configured endpoints\n", path.c_str());
61+
res.status = 404;
62+
res.set_content("{\"error\":\"endpoint not proxied\"}", "application/json");
63+
return false;
64+
}
65+
4466
std::string content_type = req.get_header_value("Content-Type", "application/json");
4567

4668
const auto accept_header = req.get_header_value("Accept");

tools/router/router-proxy.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@
99
bool proxy_request(const httplib::Request & req,
1010
httplib::Response & res,
1111
const std::string & upstream_base,
12-
const RouterOptions & opts);
12+
const RouterOptions & opts,
13+
const std::vector<std::string> & proxy_endpoints);

tools/router/router-scanner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ std::vector<ModelConfig> scan_default_models() {
142142
mc.state = "auto";
143143
if (auto it_mmproj = mmproj_map.find(full_path); it_mmproj != mmproj_map.end()) {
144144
mc.spawn = get_default_spawn();
145-
mc.spawn.push_back("--mmproj");
146-
mc.spawn.push_back(it_mmproj->second);
145+
mc.spawn.command.push_back("--mmproj");
146+
mc.spawn.command.push_back(it_mmproj->second);
147147
}
148148

149149
models.push_back(std::move(mc));

0 commit comments

Comments
 (0)