Skip to content

Commit fd289ef

Browse files
server : router config POC (INI-based per-model settings)
1 parent f7170cc commit fd289ef

File tree

7 files changed

+539
-31
lines changed

7 files changed

+539
-31
lines changed

common/arg.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,26 @@ static std::string read_file(const std::string & fname) {
6363
return content;
6464
}
6565

66+
static const std::vector<common_arg> & get_common_arg_defs() {
67+
static const std::vector<common_arg> options = [] {
68+
common_params params;
69+
auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_SERVER, nullptr);
70+
return ctx.options;
71+
}();
72+
return options;
73+
}
74+
75+
std::string common_arg_get_env_name(const std::string & flag) {
76+
for (const auto & arg : get_common_arg_defs()) {
77+
for (const auto & arg_flag : arg.args) {
78+
if (arg_flag == flag) {
79+
return arg.env ? arg.env : "";
80+
}
81+
}
82+
}
83+
return "";
84+
}
85+
6686
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
6787
this->examples = examples;
6888
return *this;

common/arg.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
7979
// function to be used by test-arg-parser
8080
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
8181

82+
// Get environment variable name for a CLI flag (e.g. "--ctx-size" -> "LLAMA_ARG_CTX_SIZE")
83+
// Returns empty string if flag not found
84+
std::string common_arg_get_env_name(const std::string & flag);
85+
8286
struct common_remote_params {
8387
std::vector<std::string> headers;
8488
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout

tools/server/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ set(TARGET_SRCS
1212
server-http.h
1313
server-models.cpp
1414
server-models.h
15+
server-config.cpp
16+
server-config.h
1517
server-task.cpp
1618
server-task.h
1719
server-queue.cpp

tools/server/server-config.cpp

Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
#include "server-config.h"
2+
3+
#include "peg-parser.h"
4+
#include "arg.h"
5+
6+
#include <algorithm>
7+
#include <cctype>
8+
#include <fstream>
9+
#include <functional>
10+
#include <optional>
11+
#include <set>
12+
13+
namespace {
14+
15+
bool is_option(const std::string & arg) {
16+
return !arg.empty() && arg[0] == '-';
17+
}
18+
19+
std::string trim(const std::string & value) {
20+
const auto is_space = [](unsigned char c) { return std::isspace(c) != 0; };
21+
size_t start = 0;
22+
while (start < value.size() && is_space(value[start])) {
23+
++start;
24+
}
25+
size_t end = value.size();
26+
while (end > start && is_space(value[end - 1])) {
27+
--end;
28+
}
29+
return value.substr(start, end - start);
30+
}
31+
32+
bool is_implicit_value(const std::vector<std::string> & args, size_t index) {
33+
return index + 1 < args.size() && !is_option(args[index + 1]);
34+
}
35+
36+
std::string relativize(const std::string & path, const std::string & base) {
37+
if (path.empty()) {
38+
return path;
39+
}
40+
41+
std::error_code ec;
42+
const auto abs_path = std::filesystem::absolute(path, ec);
43+
if (ec) {
44+
return path;
45+
}
46+
const auto abs_base = std::filesystem::absolute(base, ec);
47+
if (ec) {
48+
return path;
49+
}
50+
51+
const auto rel = std::filesystem::relative(abs_path, abs_base, ec);
52+
if (ec) {
53+
return path;
54+
}
55+
56+
return rel.generic_string();
57+
}
58+
59+
} // namespace
60+
61+
server_config_manager::server_config_manager(const std::string & models_dir)
62+
: models_dir(models_dir) {
63+
if (!models_dir.empty()) {
64+
path = (std::filesystem::path(models_dir) / "config.ini").string();
65+
}
66+
}
67+
68+
bool server_config_manager::enabled() const {
69+
return !models_dir.empty();
70+
}
71+
72+
void server_config_manager::ensure_loaded() {
73+
if (!enabled()) {
74+
return;
75+
}
76+
77+
namespace fs = std::filesystem;
78+
79+
std::lock_guard<std::mutex> lock(mutex);
80+
81+
if (!fs::exists(path)) {
82+
data.clear();
83+
last_write_time = {};
84+
return;
85+
}
86+
87+
const auto current_write_time = fs::last_write_time(path);
88+
if (last_write_time == current_write_time) {
89+
return;
90+
}
91+
92+
std::ifstream file(path);
93+
if (!file.good()) {
94+
throw std::runtime_error("failed to open server config file: " + path);
95+
}
96+
97+
std::string contents((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
98+
99+
static const auto & parser = *new common_peg_arena(build_peg_parser([](common_peg_parser_builder & p) {
100+
const auto ws = p.space();
101+
const auto new_line = p.choice({p.literal("\r\n"), p.literal("\n"), p.literal("\r")});
102+
103+
const auto section_name = p.tag("section-name", p.until("]"));
104+
const auto section_line = p.zero_or_more(ws) + "[" + section_name + "]" + p.optional(p.until_one_of({"\r", "\n"}));
105+
106+
const auto key = p.tag("key", p.until("="));
107+
const auto value = p.tag("value", p.until_one_of({"\r", "\n"}));
108+
const auto key_value_line = p.zero_or_more(ws) + key + p.zero_or_more(ws) + "=" + p.zero_or_more(ws) + p.optional(value);
109+
110+
const auto comment = p.choice({p.literal(";"), p.literal("#")}) + p.optional(p.until_one_of({"\r", "\n"}));
111+
const auto comment_line = p.zero_or_more(ws) + comment;
112+
113+
const auto blank_line = p.zero_or_more(ws) + new_line;
114+
115+
const auto line = p.choice({
116+
section_line << p.optional(new_line),
117+
key_value_line << p.optional(new_line),
118+
comment_line << p.optional(new_line),
119+
blank_line,
120+
});
121+
122+
return p.rule("ini", p.zero_or_more(line) << p.optional(p.zero_or_more(ws)) << p.end());
123+
}));
124+
125+
common_peg_parse_context ctx(contents);
126+
const auto result = parser.parse(ctx);
127+
if (!result.success() || result.end != contents.size()) {
128+
throw std::runtime_error("failed to parse server config file: " + path);
129+
}
130+
131+
std::map<std::string, std::map<std::string, std::string>> parsed;
132+
std::string current_section;
133+
std::optional<std::string> pending_key;
134+
135+
const auto flush_pending = [&](const std::string & value) {
136+
if (current_section.empty() || !pending_key) {
137+
return;
138+
}
139+
140+
const auto & key = *pending_key;
141+
if (key.rfind("LLAMA_ARG_", 0) != 0) {
142+
return;
143+
}
144+
145+
parsed[current_section][key] = value;
146+
};
147+
148+
ctx.ast.visit(result, [&](const common_peg_ast_node & node) {
149+
if (node.tag == "section-name") {
150+
if (pending_key) {
151+
flush_pending("");
152+
pending_key.reset();
153+
}
154+
155+
current_section = trim(std::string(node.text));
156+
return;
157+
}
158+
159+
if (node.tag == "key") {
160+
if (pending_key) {
161+
flush_pending("");
162+
}
163+
164+
pending_key = trim(std::string(node.text));
165+
return;
166+
}
167+
168+
if (node.tag == "value") {
169+
if (!pending_key) {
170+
return;
171+
}
172+
173+
flush_pending(trim(std::string(node.text)));
174+
pending_key.reset();
175+
return;
176+
}
177+
});
178+
179+
if (pending_key) {
180+
flush_pending("");
181+
}
182+
183+
data = std::move(parsed);
184+
last_write_time = current_write_time;
185+
}
186+
187+
// write_locked expects the caller to hold `mutex`.
188+
void server_config_manager::write_locked() {
189+
if (!enabled()) {
190+
return;
191+
}
192+
193+
namespace fs = std::filesystem;
194+
195+
if (!path.empty()) {
196+
auto parent = fs::path(path).parent_path();
197+
if (!parent.empty()) {
198+
fs::create_directories(parent);
199+
}
200+
}
201+
202+
std::ofstream file(path);
203+
file << "LLAMA_CONFIG_VERSION=1\n\n";
204+
205+
bool first_section = true;
206+
for (const auto & [section, args] : data) {
207+
if (!first_section) {
208+
file << "\n";
209+
}
210+
first_section = false;
211+
212+
file << "[" << section << "]\n";
213+
for (const auto & [key, value] : args) {
214+
file << key << "=";
215+
if (!value.empty()) {
216+
file << value;
217+
}
218+
file << "\n";
219+
}
220+
}
221+
222+
file.flush();
223+
last_write_time = fs::last_write_time(path);
224+
}
225+
226+
bool is_router_control_arg(const std::string & arg) {
227+
static const std::set<std::string> blacklist = {
228+
"--alias", // set per-child in server_models::load
229+
"--models-dir", // router-side discovery only
230+
"--models-max", // router capacity control
231+
"--no-models-autoload", // router autoload policy
232+
"--port", // router port differs from child port
233+
"-m", "--model", // model path supplied per-child
234+
"-hf", "--hf-file" // model source supplied per-child
235+
};
236+
return blacklist.count(arg) != 0;
237+
}
238+
239+
void server_config_manager::sync(const std::vector<server_local_model> & models, const std::vector<std::string> & base_args) {
240+
if (!enabled()) {
241+
return;
242+
}
243+
244+
ensure_loaded();
245+
246+
std::map<std::string, std::string> router_args;
247+
248+
for (size_t i = 1; i < base_args.size(); ++i) { // skip argv[0]
249+
const auto & arg = base_args[i];
250+
if (!is_option(arg)) {
251+
continue;
252+
}
253+
254+
if (is_router_control_arg(arg)) {
255+
if (is_implicit_value(base_args, i)) {
256+
++i;
257+
}
258+
continue;
259+
}
260+
261+
std::string value = "true";
262+
if (is_implicit_value(base_args, i)) {
263+
value = base_args[i + 1];
264+
++i;
265+
}
266+
267+
const auto env_name = common_arg_get_env_name(arg);
268+
if (!env_name.empty()) {
269+
router_args[env_name] = value;
270+
}
271+
}
272+
273+
std::lock_guard<std::mutex> lock(mutex);
274+
275+
bool changed = !std::filesystem::exists(path);
276+
277+
const auto model_key = common_arg_get_env_name("--model");
278+
const auto model_alias = common_arg_get_env_name("-m");
279+
const auto mmproj_key = common_arg_get_env_name("--mmproj");
280+
281+
const std::vector<std::string> model_keys = {
282+
model_key,
283+
model_alias,
284+
"LLAMA_ARG_MODEL",
285+
};
286+
287+
const std::vector<std::string> mmproj_keys = {
288+
mmproj_key,
289+
"LLAMA_ARG_MMPROJ",
290+
};
291+
292+
for (const auto & model : models) {
293+
auto & section = data[model.name];
294+
295+
const auto has_any_key = [](const auto & section_map, const std::vector<std::string> & keys) {
296+
for (const auto & key : keys) {
297+
if (!key.empty() && section_map.find(key) != section_map.end()) {
298+
return true;
299+
}
300+
}
301+
return false;
302+
};
303+
304+
if (!model_key.empty() && !has_any_key(section, model_keys)) {
305+
section[model_key] = relativize(model.path, models_dir);
306+
changed = true;
307+
}
308+
309+
if (!model.path_mmproj.empty() && !mmproj_key.empty() && !has_any_key(section, mmproj_keys)) {
310+
section[mmproj_key] = relativize(model.path_mmproj, models_dir);
311+
changed = true;
312+
}
313+
314+
for (const auto & router_arg : router_args) {
315+
if (section.find(router_arg.first) == section.end()) {
316+
section[router_arg.first] = router_arg.second;
317+
changed = true;
318+
}
319+
}
320+
}
321+
322+
if (changed) {
323+
write_locked();
324+
}
325+
}
326+
327+
std::map<std::string, std::string> server_config_manager::env_for(const std::string & name) {
328+
if (!enabled()) {
329+
return {};
330+
}
331+
332+
ensure_loaded();
333+
334+
std::lock_guard<std::mutex> lock(mutex);
335+
336+
auto it = data.find(name);
337+
return it != data.end() ? it->second : std::map<std::string, std::string>{};
338+
}
339+

0 commit comments

Comments
 (0)