Skip to content

Commit 67c1990

Browse files
llama-router: add --import-dir for custom model collections
New CLI flag --import-dir <path> recursively scans local directories and imports GGUF models as manual state (spawn on-demand only) Features: - Smart mmproj detection: skips mmproj files as standalone models - Auto-associates mmproj to models in same directory - Priority: BF16 > F16 > F32 when multiple mmproj variants exist - All quants of same model share the same prioritized mmproj - Idempotent: won't duplicate existing models on re-import - Manifest-optional: works without HF manifests for local collections Fixes: - Robust manifest handling: no crash if manifest JSON missing - PATH binary check: only validates paths with separators Example directory structure: /mnt/models/ ├─ unsloth/ │ ├─ Qwen3-VL-32B-Instruct-GGUF/ │ │ ├─ Qwen3-VL-32B-Instruct-Q4_K_M.gguf ─┐ │ │ ├─ Qwen3-VL-32B-Instruct-Q5_K_M.gguf ─┼─> all use mmproj-BF16.gguf │ │ ├─ Qwen3-VL-32B-Instruct-Q6_K.gguf ─┘ │ │ ├─ mmproj-BF16.gguf <- priority 1 (selected) │ │ ├─ mmproj-F16.gguf <- priority 2 │ │ └─ mmproj-F32.gguf <- priority 3 │ └── DeepSeek-R1-Distill-Qwen-32B-GGUF/ │ ├─ DeepSeek-R1-Distill-Qwen-32B-Q5_K_M.gguf │ └─ DeepSeek-R1-Distill-Qwen-32B-Q6_K.gguf └── bartowski/ └─ Valkyrie-49B-v2-GGUF/ ├─ Valkyrie-49B-v2-Q4_K_M.gguf └─ Valkyrie-49B-v2-IQ4_NL.gguf Usage: llama-router --import-dir /mnt/models/ llama-router --import-dir ~/my-gguf-collection All imported models are set to manual state (never auto-removed by rescan)
1 parent dda9dc6 commit 67c1990

File tree

6 files changed

+137
-19
lines changed

6 files changed

+137
-19
lines changed

tools/router/router-admin.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,8 @@ void register_admin_routes(httplib::Server & server, RouterApp & app, const std:
3131
if (!authorize_admin(app.get_config(), req, res)) {
3232
return;
3333
}
34-
LOG_INF("Reloading router application: stopping and auto-starting models\n");
34+
LOG_INF("Reloading router application: stopping managed models\n");
3535
app.stop_all();
36-
app.start_auto_models();
3736
res.set_content("{\"status\":\"reloaded\"}", "application/json");
3837
});
3938

tools/router/router-app.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,6 @@ SpawnConfig RouterApp::get_spawn_config(const std::string & model_name) {
3030
return resolve_spawn_config(it->second);
3131
}
3232

33-
void RouterApp::start_auto_models() {
34-
for (const auto & model : config.models) {
35-
if (model.state == "auto") {
36-
std::string err;
37-
if (!ensure_running(model.name, err)) {
38-
LOG_WRN("auto-start for %s failed: %s\n", model.name.c_str(), err.c_str());
39-
} else {
40-
LOG_INF("auto-started %s\n", model.name.c_str());
41-
}
42-
}
43-
}
44-
}
45-
4633
bool RouterApp::ensure_running(const std::string & model_name, std::string & error) {
4734
std::lock_guard<std::mutex> lock(mutex);
4835
auto it_cfg = model_lookup.find(model_name);

tools/router/router-app.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ class RouterApp {
1313
explicit RouterApp(RouterConfig cfg);
1414
~RouterApp();
1515

16-
void start_auto_models();
1716
bool ensure_running(const std::string & model_name, std::string & error);
1817
std::string upstream_for(const std::string & model_name);
1918
std::string get_last_spawned_model();

tools/router/router-scanner.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,80 @@ std::vector<ModelConfig> scan_default_models() {
152152
LOG_INF("Model scanner found %zu candidates in %s\n", models.size(), cache_dir.c_str());
153153
return models;
154154
}
155+
156+
static std::string find_mmproj_in_dir(const std::filesystem::path & dir) {
157+
static const std::vector<std::string> priorities = {"bf16.gguf", "f16.gguf", "f32.gguf"};
158+
159+
std::error_code ec;
160+
for (const auto & priority : priorities) {
161+
for (std::filesystem::directory_iterator it(dir, ec), end; it != end && !ec; ++it) {
162+
if (!it->is_regular_file()) {
163+
continue;
164+
}
165+
166+
std::string filename = it->path().filename().string();
167+
std::transform(filename.begin(), filename.end(), filename.begin(), [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
168+
169+
if (filename.find("mmproj") == std::string::npos) {
170+
continue;
171+
}
172+
173+
if (filename.size() < priority.size() || filename.rfind(priority) != filename.size() - priority.size()) {
174+
continue;
175+
}
176+
177+
return it->path().string();
178+
}
179+
}
180+
181+
return {};
182+
}
183+
184+
std::vector<ModelConfig> scan_custom_dir(const std::string & path, const std::string & state) {
185+
std::vector<ModelConfig> models;
186+
187+
std::error_code ec;
188+
if (!std::filesystem::exists(path, ec) || !std::filesystem::is_directory(path, ec) || ec) {
189+
return models;
190+
}
191+
192+
std::unordered_set<std::string> seen;
193+
194+
for (std::filesystem::recursive_directory_iterator it(path, ec), end; it != end && !ec; ++it) {
195+
if (!it->is_regular_file()) {
196+
continue;
197+
}
198+
std::string ext = it->path().extension().string();
199+
std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
200+
if (ext != ".gguf") {
201+
continue;
202+
}
203+
204+
std::string full_path = it->path().string();
205+
if (seen.count(full_path)) {
206+
continue;
207+
}
208+
seen.insert(full_path);
209+
210+
std::string filename = it->path().filename().string();
211+
std::transform(filename.begin(), filename.end(), filename.begin(), [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
212+
213+
if (filename.find("mmproj") != std::string::npos) {
214+
continue;
215+
}
216+
217+
ModelConfig mc;
218+
mc.name = it->path().filename().string();
219+
mc.path = full_path;
220+
mc.state = state;
221+
if (auto mmproj_path = find_mmproj_in_dir(it->path().parent_path()); !mmproj_path.empty()) {
222+
mc.spawn = get_default_spawn();
223+
mc.spawn.command.push_back("--mmproj");
224+
mc.spawn.command.push_back(mmproj_path);
225+
}
226+
227+
models.push_back(std::move(mc));
228+
}
229+
230+
return models;
231+
}

tools/router/router-scanner.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#include "router-config.h"
44

5+
#include <string>
56
#include <vector>
67

78
std::vector<ModelConfig> scan_default_models();
9+
std::vector<ModelConfig> scan_custom_dir(const std::string & path, const std::string & state);

tools/router/router.cpp

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "router-app.h"
66
#include "router-config.h"
77
#include "router-constants.h"
8+
#include "router-scanner.h"
89
#include "router-admin.h"
910
#include "router-endpoints.h"
1011

@@ -17,6 +18,7 @@
1718
#include <cstdlib>
1819
#include <string>
1920
#include <thread>
21+
#include <unordered_set>
2022

2123
static std::atomic<bool> g_shutdown{false};
2224
static httplib::Server * g_server = nullptr;
@@ -33,6 +35,7 @@ struct CliOptions {
3335
std::string hf_repo;
3436
std::string hf_file;
3537
std::string config_path;
38+
std::string import_dir;
3639
};
3740

3841
static bool parse_cli(int argc, char ** argv, CliOptions & out) {
@@ -58,6 +61,12 @@ static bool parse_cli(int argc, char ** argv, CliOptions & out) {
5861
return false;
5962
}
6063
out.config_path = argv[++i];
64+
} else if (arg == "--import-dir") {
65+
if (i + 1 >= argc) {
66+
fprintf(stderr, "error: missing value for --import-dir\n");
67+
return false;
68+
}
69+
out.import_dir = argv[++i];
6170
} else {
6271
fprintf(stderr, "warning: unknown argument %s\n", arg.c_str());
6372
}
@@ -72,6 +81,7 @@ static void print_help() {
7281
printf(" --config <path> Override config path (default: ~/.config/llama.cpp/router-config.json)\n");
7382
printf(" -hf, -hfr, --hf-repo Hugging Face repository to download (format <user>/<repo>[:quant])\n");
7483
printf(" -hff, --hf-file Specific GGUF filename to fetch from repository\n");
84+
printf(" --import-dir <path> Recursively import GGUF models from directory\n");
7585
}
7686

7787
static bool handle_download(const CliOptions & opts) {
@@ -112,6 +122,47 @@ static bool handle_download(const CliOptions & opts) {
112122
return true;
113123
}
114124

125+
static bool handle_import(const CliOptions & opts, const std::string & config_path, int & exit_code) {
126+
if (opts.import_dir.empty()) {
127+
return false;
128+
}
129+
130+
exit_code = 0;
131+
132+
const std::string import_dir = expand_user_path(opts.import_dir);
133+
auto scanned = scan_custom_dir(import_dir, "manual");
134+
135+
RouterConfig cfg;
136+
try {
137+
cfg = load_config(config_path);
138+
} catch (const std::exception & e) {
139+
fprintf(stderr, "%s\n", e.what());
140+
exit_code = 1;
141+
return true;
142+
}
143+
144+
std::unordered_set<std::string> existing_paths;
145+
for (const auto & model : cfg.models) {
146+
existing_paths.insert(expand_user_path(model.path));
147+
}
148+
149+
size_t added = 0;
150+
for (auto & model : scanned) {
151+
const auto expanded = expand_user_path(model.path);
152+
if (existing_paths.insert(expanded).second) {
153+
cfg.models.push_back(std::move(model));
154+
++added;
155+
}
156+
}
157+
158+
if (added > 0) {
159+
write_config_file(cfg, config_path);
160+
}
161+
162+
LOG_INF("Imported %zu models from %s\n", added, import_dir.c_str());
163+
return true;
164+
}
165+
115166
int main(int argc, char ** argv) {
116167
CliOptions cli;
117168
router_log_init();
@@ -127,12 +178,17 @@ int main(int argc, char ** argv) {
127178
return 0;
128179
}
129180

181+
std::string config_path = !cli.config_path.empty() ? expand_user_path(cli.config_path) : get_default_config_path();
182+
130183
if (handle_download(cli)) {
131184
LOG_INF("Download-only mode completed, exiting\n");
132185
return 0;
133186
}
134187

135-
std::string config_path = !cli.config_path.empty() ? expand_user_path(cli.config_path) : get_default_config_path();
188+
int import_exit_code = 0;
189+
if (handle_import(cli, config_path, import_exit_code)) {
190+
return import_exit_code;
191+
}
136192
LOG_INF("Loading router configuration from %s\n", config_path.c_str());
137193

138194
RouterConfig cfg;
@@ -148,8 +204,6 @@ int main(int argc, char ** argv) {
148204

149205
RouterApp app(cfg);
150206
LOG_INF("Initialized RouterApp with default spawn command size=%zu\n", cfg.default_spawn.command.size());
151-
app.start_auto_models();
152-
LOG_INF("Auto-start requested, last spawned model: %s\n", app.get_last_spawned_model().c_str());
153207

154208
httplib::Server server;
155209
g_server = &server;

0 commit comments

Comments
 (0)