Skip to content

Commit e03668a

Browse files
server: fix router model discovery and child process spawning
- Sanitize model names: replace / and \ with _ for display - Recursive directory scan with relative path storage - Convert relative paths to absolute when spawning children - Filter router control args from child processes - Refresh args after port assignment for correct port value - Fallback preset lookup for compatibility - Fix missing argv[0]: store server binary path before base_args parsing
1 parent c0ddbbf commit e03668a

File tree

2 files changed

+100
-48
lines changed

2 files changed

+100
-48
lines changed

tools/server/server-models.cpp

Lines changed: 97 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
88
#include <sheredom/subprocess.h>
99

10+
#include <cstdio>
1011
#include <functional>
1112
#include <algorithm>
1213
#include <thread>
@@ -77,57 +78,87 @@ static std::filesystem::path get_server_exec_path() {
7778

7879
struct local_model {
7980
std::string name;
81+
std::string display_name;
8082
std::string path;
8183
std::string path_mmproj;
8284
};
8385

86+
static std::string sanitize_model_name(const std::string & name) {
87+
std::string sanitized = name;
88+
string_replace_all(sanitized, "/", "_");
89+
string_replace_all(sanitized, "\\", "_");
90+
return sanitized;
91+
}
92+
8493
static std::vector<local_model> list_local_models(const std::string & dir) {
8594
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
8695
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
8796
}
8897

8998
std::vector<local_model> models;
90-
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
91-
auto files = fs_list(subdir_path, false);
92-
common_file_info model_file;
93-
common_file_info first_shard_file;
94-
common_file_info mmproj_file;
95-
for (const auto & file : files) {
96-
if (string_ends_with(file.name, ".gguf")) {
97-
if (file.name.find("mmproj") != std::string::npos) {
98-
mmproj_file = file;
99-
} else if (file.name.find("-00001-of-") != std::string::npos) {
100-
first_shard_file = file;
101-
} else {
102-
model_file = file;
99+
std::function<void(const std::string &, const std::string &)> scan_subdir =
100+
[&](const std::string & subdir_path, const std::string & name) {
101+
auto files = fs_list(subdir_path, true); // Need directories for recursion
102+
common_file_info model_file;
103+
common_file_info first_shard_file;
104+
common_file_info mmproj_file;
105+
106+
for (const auto & file : files) {
107+
if (file.is_dir) {
108+
const std::string child_name = name.empty() ? file.name : name + "/" + file.name;
109+
scan_subdir(file.path, child_name);
110+
continue;
111+
}
112+
113+
if (string_ends_with(file.name, ".gguf")) {
114+
if (file.name.find("mmproj") != std::string::npos) {
115+
mmproj_file = file;
116+
} else if (file.name.find("-00001-of-") != std::string::npos) {
117+
first_shard_file = file;
118+
} else {
119+
model_file = file;
120+
}
121+
}
122+
}
123+
124+
// Convert absolute paths to relative
125+
std::string model_path = first_shard_file.path.empty() ? model_file.path : first_shard_file.path;
126+
if (!model_path.empty()) {
127+
std::error_code ec;
128+
auto rel_path = std::filesystem::relative(model_path, dir, ec);
129+
if (!ec) {
130+
model_path = rel_path.generic_string();
131+
}
132+
}
133+
134+
std::string mmproj_path = mmproj_file.path;
135+
if (!mmproj_path.empty()) {
136+
std::error_code ec;
137+
auto rel_path = std::filesystem::relative(mmproj_path, dir, ec);
138+
if (!ec) {
139+
mmproj_path = rel_path.generic_string();
103140
}
104141
}
105-
}
106-
// single file model
107-
local_model model{
108-
/* name */ name,
109-
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
110-
/* path_mmproj */ mmproj_file.path // can be empty
111-
};
112-
if (!model.path.empty()) {
113-
models.push_back(model);
114-
}
115-
};
116142

117-
auto files = fs_list(dir, true);
118-
for (const auto & file : files) {
119-
if (file.is_dir) {
120-
scan_subdir(file.path, file.name);
121-
} else if (string_ends_with(file.name, ".gguf")) {
122-
// single file model
123-
std::string name = file.name;
124-
string_replace_all(name, ".gguf", "");
125143
local_model model{
126-
/* name */ name,
127-
/* path */ file.path,
128-
/* path_mmproj */ ""
144+
/* name */ name,
145+
/* display_name */ sanitize_model_name(name),
146+
/* path */ model_path,
147+
/* path_mmproj */ mmproj_path // can be empty
129148
};
130-
models.push_back(model);
149+
if (!model.path.empty()) {
150+
models.push_back(model);
151+
}
152+
};
153+
154+
scan_subdir(dir, "");
155+
156+
// when scanning the root, the name is empty, so adjust names for models directly under models_dir
157+
for (auto & model : models) {
158+
if (model.name.empty() && !model.path.empty()) {
159+
model.name = std::filesystem::path(model.path).filename().string();
160+
string_replace_all(model.name, ".gguf", "");
161+
model.display_name = sanitize_model_name(model.name);
131162
}
132163
}
133164
return models;
@@ -138,8 +169,8 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
138169
//
139170

140171

141-
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path)
142-
: ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
172+
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir)
173+
: ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)), models_dir(models_dir) {
143174
if (!presets_path.empty()) {
144175
presets = common_presets_load(presets_path, ctx_params);
145176
SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str());
@@ -156,6 +187,7 @@ server_presets::server_presets(int argc, char ** argv, common_params & base_para
156187
if (env == "LLAMA_ARG_PORT" ||
157188
env == "LLAMA_ARG_HOST" ||
158189
env == "LLAMA_ARG_ALIAS" ||
190+
env == "LLAMA_ARG_MODELS_PRESET" ||
159191
env == "LLAMA_ARG_API_KEY" ||
160192
env == "LLAMA_ARG_MODELS_DIR" ||
161193
env == "LLAMA_ARG_MODELS_MAX" ||
@@ -200,9 +232,17 @@ void server_presets::render_args(server_model_meta & meta) {
200232
if (meta.in_cache) {
201233
preset.options[control_args["LLAMA_ARG_HF_REPO"]] = meta.name;
202234
} else {
203-
preset.options[control_args["LLAMA_ARG_MODEL"]] = meta.path;
235+
std::string model_path = meta.path;
236+
if (!models_dir.empty() && !std::filesystem::path(model_path).is_absolute()) {
237+
model_path = models_dir + "/" + model_path;
238+
}
239+
preset.options[control_args["LLAMA_ARG_MODEL"]] = model_path;
204240
if (!meta.path_mmproj.empty()) {
205-
preset.options[control_args["LLAMA_ARG_MMPROJ"]] = meta.path_mmproj;
241+
std::string mmproj_path = meta.path_mmproj;
242+
if (!models_dir.empty() && !std::filesystem::path(mmproj_path).is_absolute()) {
243+
mmproj_path = models_dir + "/" + mmproj_path;
244+
}
245+
preset.options[control_args["LLAMA_ARG_MMPROJ"]] = mmproj_path;
206246
}
207247
}
208248
meta.args = preset.to_args();
@@ -216,20 +256,21 @@ server_models::server_models(
216256
const common_params & params,
217257
int argc,
218258
char ** argv,
219-
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
259+
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset, params.models_dir) {
220260
for (int i = 0; i < argc; i++) {
221261
base_args.push_back(std::string(argv[i]));
222262
}
223263
for (char ** env = envp; *env != nullptr; env++) {
224264
base_env.push_back(std::string(*env));
225265
}
226266
GGML_ASSERT(!base_args.empty());
227-
// set binary path
267+
// Save binary path before base_args is modified by presets parsing
228268
try {
229-
base_args[0] = get_server_exec_path().string();
269+
server_binary_path = get_server_exec_path().string();
230270
} catch (const std::exception & e) {
231271
LOG_WRN("failed to get server executable path: %s\n", e.what());
232-
LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str());
272+
LOG_WRN("using original argv[0] as fallback: %s\n", argv[0]);
273+
server_binary_path = std::string(argv[0]);
233274
}
234275
// TODO: allow refreshing cached model list
235276
// add cached models
@@ -258,13 +299,18 @@ server_models::server_models(
258299
if (!params.models_dir.empty()) {
259300
auto local_models = list_local_models(params.models_dir);
260301
for (const auto & model : local_models) {
261-
if (mapping.find(model.name) != mapping.end()) {
302+
const std::string name = model.display_name;
303+
if (mapping.find(name) != mapping.end()) {
262304
// already exists in cached models, skip
263305
continue;
264306
}
307+
auto preset = presets.get_preset(name);
308+
if (preset.name.empty() && name != model.name) {
309+
preset = presets.get_preset(model.name);
310+
}
265311
server_model_meta meta{
266-
/* preset */ presets.get_preset(model.name),
267-
/* name */ model.name,
312+
/* preset */ preset,
313+
/* name */ name,
268314
/* path */ model.path,
269315
/* path_mmproj */ model.path_mmproj,
270316
/* in_cache */ false,
@@ -445,11 +491,15 @@ void server_models::load(const std::string & name) {
445491
throw std::runtime_error("failed to get a port number");
446492
}
447493

494+
presets.render_args(inst.meta);
495+
448496
inst.subproc = std::make_shared<subprocess_s>();
449497
{
450498
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
451499

452500
std::vector<std::string> child_args = inst.meta.args; // copy
501+
// Insert binary path as argv[0]
502+
child_args.insert(child_args.begin(), server_binary_path);
453503
std::vector<std::string> child_env = base_env; // copy
454504
child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));
455505

tools/server/server-models.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ struct server_presets {
7575
common_params_context ctx_params;
7676
std::map<common_arg, std::string> base_args;
7777
std::map<std::string, common_arg> control_args; // args reserved for server control
78+
std::string models_dir;
7879

79-
server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir);
80+
server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir);
8081
common_preset get_preset(const std::string & name);
8182
void render_args(server_model_meta & meta);
8283
};
@@ -99,6 +100,7 @@ struct server_models {
99100
common_params base_params;
100101
std::vector<std::string> base_args;
101102
std::vector<std::string> base_env;
103+
std::string server_binary_path;
102104

103105
server_presets presets;
104106

0 commit comments

Comments
 (0)