77#include < cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
88#include < sheredom/subprocess.h>
99
10+ #include < cstdio>
1011#include < functional>
1112#include < algorithm>
1213#include < thread>
@@ -77,57 +78,87 @@ static std::filesystem::path get_server_exec_path() {
7778
7879struct local_model {
7980 std::string name;
81+ std::string display_name;
8082 std::string path;
8183 std::string path_mmproj;
8284};
8385
86+ static std::string sanitize_model_name (const std::string & name) {
87+ std::string sanitized = name;
88+ string_replace_all (sanitized, " /" , " _" );
89+ string_replace_all (sanitized, " \\ " , " _" );
90+ return sanitized;
91+ }
92+
8493static std::vector<local_model> list_local_models (const std::string & dir) {
8594 if (!std::filesystem::exists (dir) || !std::filesystem::is_directory (dir)) {
8695 throw std::runtime_error (string_format (" error: '%s' does not exist or is not a directory\n " , dir.c_str ()));
8796 }
8897
8998 std::vector<local_model> models;
90- auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
91- auto files = fs_list (subdir_path, false );
92- common_file_info model_file;
93- common_file_info first_shard_file;
94- common_file_info mmproj_file;
95- for (const auto & file : files) {
96- if (string_ends_with (file.name , " .gguf" )) {
97- if (file.name .find (" mmproj" ) != std::string::npos) {
98- mmproj_file = file;
99- } else if (file.name .find (" -00001-of-" ) != std::string::npos) {
100- first_shard_file = file;
101- } else {
102- model_file = file;
99+ std::function<void (const std::string &, const std::string &)> scan_subdir =
100+ [&](const std::string & subdir_path, const std::string & name) {
101+ auto files = fs_list (subdir_path, true ); // Need directories for recursion
102+ common_file_info model_file;
103+ common_file_info first_shard_file;
104+ common_file_info mmproj_file;
105+
106+ for (const auto & file : files) {
107+ if (file.is_dir ) {
108+ const std::string child_name = name.empty () ? file.name : name + " /" + file.name ;
109+ scan_subdir (file.path , child_name);
110+ continue ;
111+ }
112+
113+ if (string_ends_with (file.name , " .gguf" )) {
114+ if (file.name .find (" mmproj" ) != std::string::npos) {
115+ mmproj_file = file;
116+ } else if (file.name .find (" -00001-of-" ) != std::string::npos) {
117+ first_shard_file = file;
118+ } else {
119+ model_file = file;
120+ }
121+ }
122+ }
123+
124+ // Convert absolute paths to relative
125+ std::string model_path = first_shard_file.path .empty () ? model_file.path : first_shard_file.path ;
126+ if (!model_path.empty ()) {
127+ std::error_code ec;
128+ auto rel_path = std::filesystem::relative (model_path, dir, ec);
129+ if (!ec) {
130+ model_path = rel_path.generic_string ();
131+ }
132+ }
133+
134+ std::string mmproj_path = mmproj_file.path ;
135+ if (!mmproj_path.empty ()) {
136+ std::error_code ec;
137+ auto rel_path = std::filesystem::relative (mmproj_path, dir, ec);
138+ if (!ec) {
139+ mmproj_path = rel_path.generic_string ();
103140 }
104141 }
105- }
106- // single file model
107- local_model model{
108- /* name */ name,
109- /* path */ first_shard_file.path .empty () ? model_file.path : first_shard_file.path ,
110- /* path_mmproj */ mmproj_file.path // can be empty
111- };
112- if (!model.path .empty ()) {
113- models.push_back (model);
114- }
115- };
116142
117- auto files = fs_list (dir, true );
118- for (const auto & file : files) {
119- if (file.is_dir ) {
120- scan_subdir (file.path , file.name );
121- } else if (string_ends_with (file.name , " .gguf" )) {
122- // single file model
123- std::string name = file.name ;
124- string_replace_all (name, " .gguf" , " " );
125143 local_model model{
126- /* name */ name,
127- /* path */ file.path ,
128- /* path_mmproj */ " "
144+ /* name */ name,
145+ /* display_name */ sanitize_model_name (name),
146+ /* path */ model_path,
147+ /* path_mmproj */ mmproj_path // can be empty
129148 };
130- models.push_back (model);
149+ if (!model.path .empty ()) {
150+ models.push_back (model);
151+ }
152+ };
153+
154+ scan_subdir (dir, " " );
155+
156+ // when scanning the root, the name is empty, so adjust names for models directly under models_dir
157+ for (auto & model : models) {
158+ if (model.name .empty () && !model.path .empty ()) {
159+ model.name = std::filesystem::path (model.path ).filename ().string ();
160+ string_replace_all (model.name , " .gguf" , " " );
161+ model.display_name = sanitize_model_name (model.name );
131162 }
132163 }
133164 return models;
@@ -138,8 +169,8 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
138169//
139170
140171
141- server_presets::server_presets (int argc, char ** argv, common_params & base_params, const std::string & presets_path)
142- : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
172+ server_presets::server_presets (int argc, char ** argv, common_params & base_params, const std::string & presets_path, const std::string & models_dir )
173+ : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)), models_dir(models_dir) {
143174 if (!presets_path.empty ()) {
144175 presets = common_presets_load (presets_path, ctx_params);
145176 SRV_INF (" Loaded %zu presets from %s\n " , presets.size (), presets_path.c_str ());
@@ -156,6 +187,7 @@ server_presets::server_presets(int argc, char ** argv, common_params & base_para
156187 if (env == " LLAMA_ARG_PORT" ||
157188 env == " LLAMA_ARG_HOST" ||
158189 env == " LLAMA_ARG_ALIAS" ||
190+ env == " LLAMA_ARG_MODELS_PRESET" ||
159191 env == " LLAMA_ARG_API_KEY" ||
160192 env == " LLAMA_ARG_MODELS_DIR" ||
161193 env == " LLAMA_ARG_MODELS_MAX" ||
@@ -200,9 +232,17 @@ void server_presets::render_args(server_model_meta & meta) {
200232 if (meta.in_cache ) {
201233 preset.options [control_args[" LLAMA_ARG_HF_REPO" ]] = meta.name ;
202234 } else {
203- preset.options [control_args[" LLAMA_ARG_MODEL" ]] = meta.path ;
235+ std::string model_path = meta.path ;
236+ if (!models_dir.empty () && !std::filesystem::path (model_path).is_absolute ()) {
237+ model_path = models_dir + " /" + model_path;
238+ }
239+ preset.options [control_args[" LLAMA_ARG_MODEL" ]] = model_path;
204240 if (!meta.path_mmproj .empty ()) {
205- preset.options [control_args[" LLAMA_ARG_MMPROJ" ]] = meta.path_mmproj ;
241+ std::string mmproj_path = meta.path_mmproj ;
242+ if (!models_dir.empty () && !std::filesystem::path (mmproj_path).is_absolute ()) {
243+ mmproj_path = models_dir + " /" + mmproj_path;
244+ }
245+ preset.options [control_args[" LLAMA_ARG_MMPROJ" ]] = mmproj_path;
206246 }
207247 }
208248 meta.args = preset.to_args ();
@@ -216,20 +256,21 @@ server_models::server_models(
216256 const common_params & params,
217257 int argc,
218258 char ** argv,
219- char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
259+ char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset, params.models_dir ) {
220260 for (int i = 0 ; i < argc; i++) {
221261 base_args.push_back (std::string (argv[i]));
222262 }
223263 for (char ** env = envp; *env != nullptr ; env++) {
224264 base_env.push_back (std::string (*env));
225265 }
226266 GGML_ASSERT (!base_args.empty ());
227- // set binary path
267+ // Save binary path before base_args is modified by presets parsing
228268 try {
229- base_args[ 0 ] = get_server_exec_path ().string ();
269+ server_binary_path = get_server_exec_path ().string ();
230270 } catch (const std::exception & e) {
231271 LOG_WRN (" failed to get server executable path: %s\n " , e.what ());
232- LOG_WRN (" using original argv[0] as fallback: %s\n " , base_args[0 ].c_str ());
272+ LOG_WRN (" using original argv[0] as fallback: %s\n " , argv[0 ]);
273+ server_binary_path = std::string (argv[0 ]);
233274 }
234275 // TODO: allow refreshing cached model list
235276 // add cached models
@@ -258,13 +299,18 @@ server_models::server_models(
258299 if (!params.models_dir .empty ()) {
259300 auto local_models = list_local_models (params.models_dir );
260301 for (const auto & model : local_models) {
261- if (mapping.find (model.name ) != mapping.end ()) {
302+ const std::string name = model.display_name ;
303+ if (mapping.find (name) != mapping.end ()) {
262304 // already exists in cached models, skip
263305 continue ;
264306 }
307+ auto preset = presets.get_preset (name);
308+ if (preset.name .empty () && name != model.name ) {
309+ preset = presets.get_preset (model.name );
310+ }
265311 server_model_meta meta{
266- /* preset */ presets. get_preset (model. name ) ,
267- /* name */ model. name ,
312+ /* preset */ preset ,
313+ /* name */ name,
268314 /* path */ model.path ,
269315 /* path_mmproj */ model.path_mmproj ,
270316 /* in_cache */ false ,
@@ -445,11 +491,15 @@ void server_models::load(const std::string & name) {
445491 throw std::runtime_error (" failed to get a port number" );
446492 }
447493
494+ presets.render_args (inst.meta );
495+
448496 inst.subproc = std::make_shared<subprocess_s>();
449497 {
450498 SRV_INF (" spawning server instance with name=%s on port %d\n " , inst.meta .name .c_str (), inst.meta .port );
451499
452500 std::vector<std::string> child_args = inst.meta .args ; // copy
501+ // Insert binary path as argv[0]
502+ child_args.insert (child_args.begin (), server_binary_path);
453503 std::vector<std::string> child_env = base_env; // copy
454504 child_env.push_back (" LLAMA_SERVER_ROUTER_PORT=" + std::to_string (base_params.port ));
455505
0 commit comments