diff --git a/common/arg.cpp b/common/arg.cpp
index 98baac4c14da2..bc7004d080d8b 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -496,8 +496,12 @@ static bool common_download_model(
         LOG_ERR("%s: invalid model url\n", __func__);
         return false;
     }
+    if (model.paths.size() != 1) {
+        LOG_ERR("%s: model url can only be specified with one path\n", __func__);
+        return false;
+    }
 
-    if (!common_download_file_single(model.url, model.path, bearer_token, offline)) {
+    if (!common_download_file_single(model.url, model.paths[0], bearer_token, offline)) {
         return false;
     }
 
@@ -508,9 +512,9 @@ static bool common_download_model(
             /*.no_alloc = */ true,
             /*.ctx      = */ NULL,
         };
-        auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params);
+        auto * ctx_gguf = gguf_init_from_file(model.paths[0].c_str(), gguf_params);
         if (!ctx_gguf) {
-            LOG_ERR("\n%s:  failed to load input GGUF from %s\n", __func__, model.path.c_str());
+            LOG_ERR("\n%s:  failed to load input GGUF from %s\n", __func__, model.paths[0].c_str());
             return false;
         }
 
@@ -529,8 +533,8 @@ static bool common_download_model(
         // Verify the first split file format
         // and extract split URL and PATH prefixes
         {
-            if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.path.c_str(), 0, n_split)) {
-                LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split);
+            if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.paths[0].c_str(), 0, n_split)) {
+                LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.paths[0].c_str(), n_split);
                 return false;
             }
 
@@ -548,7 +552,7 @@ static bool common_download_model(
             char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
             llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split);
 
-            if (std::string(split_path) == model.path) {
+            if (std::string(split_path) == model.paths[0]) {
                 continue; // skip the already downloaded file
             }
 
@@ -798,7 +802,7 @@ static handle_model_result common_params_handle_model(
         if (!model.hf_repo.empty()) {
             // short-hand to avoid specifying --hf-file -> default it to --model
             if (model.hf_file.empty()) {
-                if (model.path.empty()) {
+                if (model.paths.empty()) {
                     auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline);
                     if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
                         exit(1); // built without CURL, error message already printed
@@ -811,30 +815,30 @@ static handle_model_result common_params_handle_model(
                         result.mmproj.hf_file = auto_detected.mmprojFile;
                     }
                 } else {
-                    model.hf_file = model.path;
+                    model.hf_file = model.paths[0];
                 }
             }
 
             std::string model_endpoint = get_model_endpoint();
             model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
             // make sure model path is present (for caching purposes)
-            if (model.path.empty()) {
+            if (model.paths.empty()) {
                 // this is to avoid different repo having same file name, or same file name in different subdirs
                 std::string filename = model.hf_repo + "_" + model.hf_file;
                 // to make sure we don't have any slashes in the filename
                 string_replace_all(filename, "/", "_");
-                model.path = fs_get_cache_file(filename);
+                model.paths.push_back(fs_get_cache_file(filename));
             }
 
         } else if (!model.url.empty()) {
-            if (model.path.empty()) {
+            if (model.paths.empty()) {
                 auto f = string_split<std::string>(model.url, '#').front();
                 f = string_split<std::string>(f, '?').front();
-                model.path = fs_get_cache_file(string_split<std::string>(f, '/').back());
+                model.paths.push_back(fs_get_cache_file(string_split<std::string>(f, '/').back()));
             }
 
-        } else if (model.path.empty()) {
-            model.path = model_path_default;
+        } else if (model.paths.empty() && !model_path_default.empty()) {
+            model.paths.push_back(model_path_default);
         }
     }
 
@@ -986,7 +990,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
         auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
         if (params.no_mmproj) {
             params.mmproj = {};
-        } else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
+        } else if (res.found_mmproj && params.mmproj.paths.empty() && params.mmproj.url.empty()) {
             // optionally, handle mmproj model when -hf is specified
             params.mmproj = res.mmproj;
         }
@@ -2285,7 +2289,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         "path to a multimodal projector file. see tools/mtmd/README.md\n"
         "note: if -hf is used, this argument can be omitted",
         [](common_params & params, const std::string & value) {
-            params.mmproj.path = value;
+            if (params.mmproj.paths.empty()) {
+                params.mmproj.paths.push_back(value);
+            } else {
+                params.mmproj.paths[0] = value;
+            }
         }
     ).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ"));
     add_opt(common_arg(
@@ -2597,7 +2605,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                 "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
             ),
         [](common_params & params, const std::string & value) {
-            params.model.path = value;
+            params.model.paths.push_back(value);
         }
     ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL"));
     add_opt(common_arg(
@@ -3330,7 +3338,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         {"-md", "--model-draft"}, "FNAME",
         "draft model for speculative decoding (default: unused)",
         [](common_params & params, const std::string & value) {
-            params.speculative.model.path = value;
+            params.speculative.model.paths.push_back(value);
         }
     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT"));
     add_opt(common_arg(
@@ -3371,7 +3379,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         {"-mv", "--model-vocoder"}, "FNAME",
         "vocoder model for audio generation (default: unused)",
         [](common_params & params, const std::string & value) {
-            params.vocoder.model.path = value;
+            params.vocoder.model.paths.push_back(value);
         }
     ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
      add_opt(common_arg(
diff --git a/common/common.cpp b/common/common.cpp
index 67dd5404fff90..03759ca66d579 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -912,10 +912,24 @@ std::string fs_get_cache_file(const std::string & filename) {
 struct common_init_result common_init_from_params(common_params & params) {
     common_init_result iparams;
     auto mparams = common_model_params_to_llama(params);
+    llama_model * model = NULL;
+
+    if (params.model.paths.empty()) {
+        LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
+        return iparams;
+    } else if (params.model.paths.size() == 1) {
+        model = llama_model_load_from_file(params.model.paths[0].c_str(), mparams);
+    } else {
+        std::vector<const char *> paths;
+        paths.reserve(params.model.paths.size());
+        for (const auto & path : params.model.paths) {
+            paths.push_back(path.c_str());
+        }
+        model = llama_model_load_from_splits(paths.data(), paths.size(), mparams);
+    }
 
-    llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
     if (model == NULL) {
-        LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
+        LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.paths[0].c_str());
         return iparams;
     }
 
@@ -925,7 +939,7 @@ struct common_init_result common_init_from_params(common_params & params) {
 
     llama_context * lctx = llama_init_from_model(model, cparams);
     if (lctx == NULL) {
-        LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
+        LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.paths[0].c_str());
         llama_model_free(model);
         return iparams;
     }
diff --git a/common/common.h b/common/common.h
index 75596e6b32979..be40d07d1dde7 100644
--- a/common/common.h
+++ b/common/common.h
@@ -190,10 +190,10 @@ struct common_params_sampling {
 };
 
 struct common_params_model {
-    std::string path    = ""; // model local path                                           // NOLINT
-    std::string url     = ""; // model url to download                                      // NOLINT
-    std::string hf_repo = ""; // HF repo                                                    // NOLINT
-    std::string hf_file = ""; // HF file                                                    // NOLINT
+    std::vector<std::string> paths = {}; // model local path                                           // NOLINT
+    std::string url                = ""; // model url to download                                      // NOLINT
+    std::string hf_repo            = ""; // HF repo                                                    // NOLINT
+    std::string hf_file            = ""; // HF file                                                    // NOLINT
 };
 
 struct common_params_speculative {
diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp
index 1a5de5928a526..485f4853f3d47 100644
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@@ -41,7 +41,20 @@ int main(int argc, char ** argv) {
 
     llama_model_params model_params = common_model_params_to_llama(params);
 
-    llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
+    llama_model * model = NULL;
+    if (params.model.paths.empty()) {
+        LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
+        return 1;
+    } else if (params.model.paths.size() == 1) {
+        model = llama_model_load_from_file(params.model.paths[0].c_str(), model_params);
+    } else {
+        std::vector<const char *> paths;
+        paths.reserve(params.model.paths.size());
+        for (const auto & path : params.model.paths) {
+            paths.push_back(path.c_str());
+        }
+        model = llama_model_load_from_splits(paths.data(), paths.size(), model_params);
+    }
 
     if (model == NULL) {
         LOG_ERR("%s: error: unable to load model\n" , __func__);
diff --git a/examples/diffusion/diffusion-cli.cpp b/examples/diffusion/diffusion-cli.cpp
index 8431dcea8fe2a..de1cc9b2be7f4 100644
--- a/examples/diffusion/diffusion-cli.cpp
+++ b/examples/diffusion/diffusion-cli.cpp
@@ -548,9 +548,23 @@ int main(int argc, char ** argv) {
     model_params.use_mlock          = params.use_mlock;
     model_params.check_tensors      = params.check_tensors;
 
-    llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
+    llama_model * model = NULL;
+    if (params.model.paths.empty()) {
+        LOG_ERR("error: failed to load model 'model path not specified'\n");
+        return 1;
+    } else if (params.model.paths.size() == 1) {
+        model = llama_model_load_from_file(params.model.paths[0].c_str(), model_params);
+    } else {
+        std::vector<const char *> paths;
+        paths.reserve(params.model.paths.size());
+        for (const auto & path : params.model.paths) {
+            paths.push_back(path.c_str());
+        }
+        model = llama_model_load_from_splits(paths.data(), paths.size(), model_params);
+    }
+
     if (!model) {
-        LOG_ERR("error: failed to load model '%s'\n", params.model.path.c_str());
+        LOG_ERR("error: failed to load model '%s'\n", params.model.paths[0].c_str());
         return 1;
     }
 
diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp
index bdab052c3390f..cde0a727e670d 100644
--- a/examples/gritlm/gritlm.cpp
+++ b/examples/gritlm/gritlm.cpp
@@ -168,7 +168,20 @@ int main(int argc, char * argv[]) {
 
     llama_backend_init();
 
-    llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
+    llama_model * model = NULL;
+    if (params.model.paths.empty()) {
+        fprintf(stderr, "failed to load model 'model path not specified'\n");
+        return 1;
+    } else if (params.model.paths.size() == 1) {
+        model = llama_model_load_from_file(params.model.paths[0].c_str(), mparams);
+    } else {
+        std::vector<const char *> paths;
+        paths.reserve(params.model.paths.size());
+        for (const auto & path : params.model.paths) {
+            paths.push_back(path.c_str());
+        }
+        model = llama_model_load_from_splits(paths.data(), paths.size(), mparams);
+    }
 
     // create generation context
     llama_context * ctx = llama_init_from_model(model, cparams);
diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp
index e48f48fc32216..2f4f5f871d3e6 100644
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -495,7 +495,7 @@ int main(int argc, char ** argv) {
         params.prompt_file = "used built-in defaults";
     }
     LOG_INF("External prompt file: \033[32m%s\033[0m\n", params.prompt_file.c_str());
-    LOG_INF("Model and path used:  \033[32m%s\033[0m\n\n", params.model.path.c_str());
+    LOG_INF("Model and path used:  \033[32m%s\033[0m\n\n", params.model.paths[0].c_str());
 
     LOG_INF("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt              ) / (t_main_end - t_main_start) * 1e6);
     LOG_INF("Total gen tokens:    %6d, speed: %5.2f t/s\n", n_total_gen,    (double) (n_total_gen                 ) / (t_main_end - t_main_start) * 1e6);
diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp
index 8a4faa383bf32..f100eb3414351 100644
--- a/examples/passkey/passkey.cpp
+++ b/examples/passkey/passkey.cpp
@@ -64,7 +64,20 @@ int main(int argc, char ** argv) {
 
     llama_model_params model_params = common_model_params_to_llama(params);
 
-    llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
+    llama_model * model;
+    if (params.model.paths.empty()) {
+        LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
+        return 1;
+    } else if (params.model.paths.size() == 1) {
+        model = llama_model_load_from_file(params.model.paths[0].c_str(), model_params);
+    } else {
+        std::vector<const char *> paths;
+        paths.reserve(params.model.paths.size());
+        for (const auto & path : params.model.paths) {
+            paths.push_back(path.c_str());
+        }
+        model = llama_model_load_from_splits(paths.data(), paths.size(), model_params);
+    }
 
     if (model == NULL) {
         LOG_ERR("%s: unable to load model\n" , __func__);
diff --git a/examples/speculative-simple/speculative-simple.cpp b/examples/speculative-simple/speculative-simple.cpp
index a8e53f28eb597..65aa7742102df 100644
--- a/examples/speculative-simple/speculative-simple.cpp
+++ b/examples/speculative-simple/speculative-simple.cpp
@@ -24,7 +24,7 @@ int main(int argc, char ** argv) {
 
     common_init();
 
-    if (params.speculative.model.path.empty()) {
+    if (params.speculative.model.paths.empty()) {
         LOG_ERR("%s: --model-draft is required\n", __func__);
         return 1;
     }
@@ -67,7 +67,7 @@ int main(int argc, char ** argv) {
     ctx_dft   = llama_init_dft.context.get();
 
     if (!common_speculative_are_compatible(ctx_tgt, ctx_dft)) {
-        LOG_INF("the draft model '%s' is not compatible with the target model '%s'. tokens will be translated between the draft and target models.\n", params.speculative.model.path.c_str(), params.model.path.c_str());
+        LOG_INF("the draft model '%s' is not compatible with the target model '%s'. tokens will be translated between the draft and target models.\n", params.speculative.model.paths[0].c_str(), params.model.paths[0].c_str());
     }
 
     // Tokenize the prompt
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
index 8449406a6d27a..73e1b559667e2 100644
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -46,7 +46,7 @@ int main(int argc, char ** argv) {
 
     common_init();
 
-    if (params.speculative.model.path.empty()) {
+    if (params.speculative.model.paths.empty()) {
         LOG_ERR("%s: --model-draft is required\n", __func__);
         return 1;
     }
diff --git a/src/llama.cpp b/src/llama.cpp
index 34906cdb62844..8e24f7a21f579 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -258,6 +258,7 @@ struct llama_model * llama_model_load_from_splits(
         return nullptr;
     }
     for (size_t i = 0; i < n_paths; ++i) {
+        LLAMA_LOG_INFO("%s: splits[%zu] = '%s'\n", __func__, i, paths[i]);
         splits.push_back(paths[i]);
     }
     return llama_model_load_from_file_impl(splits.front(), splits, params);
diff --git a/tests/test-arg-parser.cpp b/tests/test-arg-parser.cpp
index e2836ca4814b4..fd331756a6bf2 100644
--- a/tests/test-arg-parser.cpp
+++ b/tests/test-arg-parser.cpp
@@ -77,7 +77,7 @@ int main(void) {
 
     argv = {"binary_name", "-m", "model_file.gguf"};
     assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
-    assert(params.model.path == "model_file.gguf");
+    assert(params.model.paths[0] == "model_file.gguf");
 
     argv = {"binary_name", "-t", "1234"};
     assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
@@ -89,7 +89,7 @@ int main(void) {
 
     argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
     assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
-    assert(params.model.path == "abc.gguf");
+    assert(params.model.paths[0] == "abc.gguf");
     assert(params.n_predict == 6789);
     assert(params.n_batch == 9090);
 
@@ -112,7 +112,7 @@ int main(void) {
     setenv("LLAMA_ARG_THREADS", "1010", true);
     argv = {"binary_name"};
     assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
-    assert(params.model.path == "blah.gguf");
+    assert(params.model.paths[0] == "blah.gguf");
     assert(params.cpuparams.n_threads == 1010);
 
 
@@ -122,7 +122,7 @@ int main(void) {
     setenv("LLAMA_ARG_THREADS", "1010", true);
     argv = {"binary_name", "-m", "overwritten.gguf"};
     assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
-    assert(params.model.path == "overwritten.gguf");
+    assert(params.model.paths[0] == "overwritten.gguf");
     assert(params.cpuparams.n_threads == 1010);
 #endif // _WIN32
 
diff --git a/tests/test-thread-safety.cpp b/tests/test-thread-safety.cpp
index 853495b00d9d2..cbe52505be078 100644
--- a/tests/test-thread-safety.cpp
+++ b/tests/test-thread-safety.cpp
@@ -66,9 +66,23 @@ int main(int argc, char ** argv) {
             mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;;
         }
 
-        llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
+        llama_model * model = NULL;
+        if (params.model.paths.empty()) {
+            LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
+            return 1;
+        } else if (params.model.paths.size() == 1) {
+            model = llama_model_load_from_file(params.model.paths[0].c_str(), mparams);
+        } else {
+            std::vector<const char *> paths;
+            paths.reserve(params.model.paths.size());
+            for (const auto & path : params.model.paths) {
+                paths.push_back(path.c_str());
+            }
+            model = llama_model_load_from_splits(paths.data(), paths.size(), mparams);
+        }
+
         if (model == NULL) {
-            LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
+            LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.paths[0].c_str());
             return 1;
         }
 
diff --git a/tools/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp
index 03628f74b2880..d1d164b392c70 100644
--- a/tools/batched-bench/batched-bench.cpp
+++ b/tools/batched-bench/batched-bench.cpp
@@ -38,7 +38,20 @@ int main(int argc, char ** argv) {
 
     llama_model_params model_params = common_model_params_to_llama(params);
 
-    llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
+    llama_model * model = NULL;
+    if (params.model.paths.empty()) {
+        fprintf(stderr, "%s: failed to load model 'model path not specified'\n", __func__);
+        return 1;
+    } else if (params.model.paths.size() == 1) {
+        model = llama_model_load_from_file(params.model.paths[0].c_str(), model_params);
+    } else {
+        std::vector<const char *> paths;
+        paths.reserve(params.model.paths.size());
+        for (const auto & path : params.model.paths) {
+            paths.push_back(path.c_str());
+        }
+        model = llama_model_load_from_splits(paths.data(), paths.size(), model_params);
+    }
 
     if (model == NULL) {
         fprintf(stderr , "%s: error: unable to load model\n" , __func__);
diff --git a/tools/export-lora/export-lora.cpp b/tools/export-lora/export-lora.cpp
index f038019b007b4..7e2d290dc584b 100644
--- a/tools/export-lora/export-lora.cpp
+++ b/tools/export-lora/export-lora.cpp
@@ -419,9 +419,14 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
+    if (params.model.paths.size() != 1) {
+        fprintf(stderr, "exactly one model path needs to be specified, got %zu\n", params.model.paths.size());
+        exit(EXIT_FAILURE);
+    }
+
     g_verbose = (params.verbosity > 1);
     try {
-        lora_merge_ctx ctx(params.model.path, params.lora_adapters, params.out_file, params.cpuparams.n_threads);
+        lora_merge_ctx ctx(params.model.paths[0], params.lora_adapters, params.out_file, params.cpuparams.n_threads);
         ctx.run_merge();
     } catch (const std::exception & err) {
         fprintf(stderr, "%s\n", err.what());
diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp
index 599e682e0f894..bd04602c76449 100644
--- a/tools/mtmd/mtmd-cli.cpp
+++ b/tools/mtmd/mtmd-cli.cpp
@@ -126,7 +126,7 @@ struct mtmd_cli_context {
     }
 
     void init_vision_context(common_params & params) {
-        const char * clip_path = params.mmproj.path.c_str();
+        const char * clip_path = params.mmproj.paths[0].c_str();
         mtmd_context_params mparams = mtmd_context_params_default();
         mparams.use_gpu = params.mmproj_use_gpu;
         mparams.print_timings = true;
@@ -257,14 +257,20 @@ int main(int argc, char ** argv) {
 
     common_init();
 
-    if (params.mmproj.path.empty()) {
+    if (params.mmproj.paths.empty()) {
         show_additional_info(argc, argv);
         LOG_ERR("ERR: Missing --mmproj argument\n");
         return 1;
     }
 
+    if (params.mmproj.paths.size() == 1) {
+        show_additional_info(argc, argv);
+        LOG_ERR("ERR: Only one --mmproj argument is supported\n");
+        return 1;
+    }
+
     mtmd_cli_context ctx(params);
-    LOG("%s: loading model: %s\n", __func__, params.model.path.c_str());
+    LOG("%s: loading model: %s\n", __func__, params.model.paths[0].c_str());
 
     bool is_single_turn = !params.prompt.empty() && !params.image.empty();
 
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index 8578d49e0394b..0c699ec59644a 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -1991,7 +1991,12 @@ struct server_context {
     }
 
     bool load_model(const common_params & params) {
-        SRV_INF("loading model '%s'\n", params.model.path.c_str());
+        if (params.model.paths.empty()) {
+            SRV_ERR("%s: no model path(s) specified\n", __func__);
+            return false;
+        }
+
+        SRV_INF("loading model '%s'\n", params.model.paths[0].c_str());
 
         params_base = params;
 
@@ -2001,7 +2006,7 @@ struct server_context {
         ctx   = llama_init.context.get();
 
         if (model == nullptr) {
-            SRV_ERR("failed to load model, '%s'\n", params_base.model.path.c_str());
+            SRV_ERR("failed to load model, '%s'\n", params_base.model.paths[0].c_str());
             return false;
         }
 
@@ -2011,8 +2016,13 @@ struct server_context {
 
         add_bos_token = llama_vocab_get_add_bos(vocab);
 
-        if (!params_base.speculative.model.path.empty() || !params_base.speculative.model.hf_repo.empty()) {
-            SRV_INF("loading draft model '%s'\n", params_base.speculative.model.path.c_str());
+        if (!params_base.speculative.model.paths.empty() || !params_base.speculative.model.hf_repo.empty()) {
+            if (params_base.speculative.model.paths.empty()) {
+                SRV_ERR("%s: no speculative model path(s) specified\n", __func__);
+                return false;
+            }
+
+            SRV_INF("loading draft model '%s'\n", params_base.speculative.model.paths[0].c_str());
 
             auto params_dft = params_base;
 
@@ -2033,13 +2043,13 @@ struct server_context {
             model_dft = llama_init_dft.model.get();
 
             if (model_dft == nullptr) {
-                SRV_ERR("failed to load draft model, '%s'\n", params_base.speculative.model.path.c_str());
+                SRV_ERR("failed to load draft model, '%s'\n", params_base.speculative.model.paths[0].c_str());
                 return false;
             }
 
             vocab_dft_compatible = common_speculative_are_compatible(ctx, llama_init_dft.context.get());
             if (!vocab_dft_compatible) {
-                SRV_INF("the draft model '%s' is not compatible with the target model '%s'. tokens will be translated between the draft and target models.\n", params_base.speculative.model.path.c_str(), params_base.model.path.c_str());
+                SRV_INF("the draft model '%s' is not compatible with the target model '%s'. tokens will be translated between the draft and target models.\n", params_base.speculative.model.paths[0].c_str(), params_base.model.paths[0].c_str());
             }
 
             const int n_ctx_dft = llama_n_ctx(llama_init_dft.context.get());
@@ -2060,8 +2070,12 @@ struct server_context {
             chat_templates = common_chat_templates_init(model, "chatml");
         }
 
-        std::string & mmproj_path = params_base.mmproj.path;
-        if (!mmproj_path.empty()) {
+        if (!params_base.mmproj.paths.empty()) {
+            if (params_base.mmproj.paths.size() != 1) {
+                SRV_ERR("%s: only one mmproj path can be specified\n", __func__);
+                return false;
+            }
+            std::string & mmproj_path = params_base.mmproj.paths[0];
             mtmd_context_params mparams = mtmd_context_params_default();
             mparams.use_gpu       = params_base.mmproj_use_gpu;
             mparams.print_timings = false;
@@ -2084,7 +2098,7 @@ struct server_context {
                 SRV_WRN("%s\n", "cache_reuse is not supported by multimodal, it will be disabled");
             }
 
-            if (!params_base.speculative.model.path.empty()) {
+            if (!params_base.speculative.model.paths.empty()) {
                 SRV_ERR("%s\n", "err: speculative decode is not supported by multimodal");
                 return false;
             }
@@ -4246,7 +4260,7 @@ int main(int argc, char ** argv) {
         json data = {
             { "default_generation_settings", ctx_server.default_generation_settings_for_props },
             { "total_slots",                 ctx_server.params_base.n_parallel },
-            { "model_path",                  ctx_server.params_base.model.path },
+            { "model_path",                  ctx_server.params_base.model.paths[0] },
             { "modalities",                  json{
                 {"vision", ctx_server.oai_parser_opt.allow_image},
                 {"audio",  ctx_server.oai_parser_opt.allow_audio},
@@ -4608,8 +4622,8 @@ int main(int argc, char ** argv) {
         json models = {
             {"models", {
                 {
-                    {"name", params.model_alias.empty() ? params.model.path : params.model_alias},
-                    {"model", params.model_alias.empty() ? params.model.path : params.model_alias},
+                    {"name", params.model_alias.empty() ? params.model.paths[0] : params.model_alias},
+                    {"model", params.model_alias.empty() ? params.model.paths[0] : params.model_alias},
                     {"modified_at", ""},
                     {"size", ""},
                     {"digest", ""}, // dummy value, llama.cpp does not support managing model file's hash
@@ -4631,7 +4645,7 @@ int main(int argc, char ** argv) {
             {"object", "list"},
             {"data", {
                 {
-                    {"id",       params.model_alias.empty() ? params.model.path : params.model_alias},
+                    {"id",       params.model_alias.empty() ? params.model.paths[0] : params.model_alias},
                     {"object",   "model"},
                     {"created",  std::time(0)},
                     {"owned_by", "llamacpp"},