Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,8 +496,12 @@ static bool common_download_model(
LOG_ERR("%s: invalid model url\n", __func__);
return false;
}
if (model.paths.size() != 1) {
LOG_ERR("%s: model url can only be specified with one path\n", __func__);
return false;
}

if (!common_download_file_single(model.url, model.path, bearer_token, offline)) {
if (!common_download_file_single(model.url, model.paths[0], bearer_token, offline)) {
return false;
}

Expand All @@ -508,9 +512,9 @@ static bool common_download_model(
/*.no_alloc = */ true,
/*.ctx = */ NULL,
};
auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params);
auto * ctx_gguf = gguf_init_from_file(model.paths[0].c_str(), gguf_params);
if (!ctx_gguf) {
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str());
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.paths[0].c_str());
return false;
}

Expand All @@ -529,8 +533,8 @@ static bool common_download_model(
// Verify the first split file format
// and extract split URL and PATH prefixes
{
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.path.c_str(), 0, n_split)) {
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split);
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.paths[0].c_str(), 0, n_split)) {
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.paths[0].c_str(), n_split);
return false;
}

Expand All @@ -548,7 +552,7 @@ static bool common_download_model(
char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split);

if (std::string(split_path) == model.path) {
if (std::string(split_path) == model.paths[0]) {
continue; // skip the already downloaded file
}

Expand Down Expand Up @@ -798,7 +802,7 @@ static handle_model_result common_params_handle_model(
if (!model.hf_repo.empty()) {
// short-hand to avoid specifying --hf-file -> default it to --model
if (model.hf_file.empty()) {
if (model.path.empty()) {
if (model.paths.empty()) {
auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline);
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
exit(1); // built without CURL, error message already printed
Expand All @@ -811,30 +815,30 @@ static handle_model_result common_params_handle_model(
result.mmproj.hf_file = auto_detected.mmprojFile;
}
} else {
model.hf_file = model.path;
model.hf_file = model.paths[0];
}
}

std::string model_endpoint = get_model_endpoint();
model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
// make sure model path is present (for caching purposes)
if (model.path.empty()) {
if (model.paths.empty()) {
// this is to avoid different repo having same file name, or same file name in different subdirs
std::string filename = model.hf_repo + "_" + model.hf_file;
// to make sure we don't have any slashes in the filename
string_replace_all(filename, "/", "_");
model.path = fs_get_cache_file(filename);
model.paths.push_back(fs_get_cache_file(filename));
}

} else if (!model.url.empty()) {
if (model.path.empty()) {
if (model.paths.empty()) {
auto f = string_split<std::string>(model.url, '#').front();
f = string_split<std::string>(f, '?').front();
model.path = fs_get_cache_file(string_split<std::string>(f, '/').back());
model.paths.push_back(fs_get_cache_file(string_split<std::string>(f, '/').back()));
}

} else if (model.path.empty()) {
model.path = model_path_default;
} else if (model.paths.empty() && !model_path_default.empty()) {
model.paths.push_back(model_path_default);
}
}

Expand Down Expand Up @@ -986,7 +990,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
if (params.no_mmproj) {
params.mmproj = {};
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
} else if (res.found_mmproj && params.mmproj.paths.empty() && params.mmproj.url.empty()) {
// optionally, handle mmproj model when -hf is specified
params.mmproj = res.mmproj;
}
Expand Down Expand Up @@ -2285,7 +2289,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"path to a multimodal projector file. see tools/mtmd/README.md\n"
"note: if -hf is used, this argument can be omitted",
[](common_params & params, const std::string & value) {
params.mmproj.path = value;
if (params.mmproj.paths.empty()) {
params.mmproj.paths.push_back(value);
} else {
params.mmproj.paths[0] = value;
}
}
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ"));
add_opt(common_arg(
Expand Down Expand Up @@ -2597,7 +2605,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
),
[](common_params & params, const std::string & value) {
params.model.path = value;
params.model.paths.push_back(value);
}
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL"));
add_opt(common_arg(
Expand Down Expand Up @@ -3330,7 +3338,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
{"-md", "--model-draft"}, "FNAME",
"draft model for speculative decoding (default: unused)",
[](common_params & params, const std::string & value) {
params.speculative.model.path = value;
params.speculative.model.paths.push_back(value);
}
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT"));
add_opt(common_arg(
Expand Down Expand Up @@ -3371,7 +3379,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
{"-mv", "--model-vocoder"}, "FNAME",
"vocoder model for audio generation (default: unused)",
[](common_params & params, const std::string & value) {
params.vocoder.model.path = value;
params.vocoder.model.paths.push_back(value);
}
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
Expand Down
20 changes: 17 additions & 3 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,10 +912,24 @@ std::string fs_get_cache_file(const std::string & filename) {
struct common_init_result common_init_from_params(common_params & params) {
common_init_result iparams;
auto mparams = common_model_params_to_llama(params);
llama_model * model = NULL;

if (params.model.paths.empty()) {
LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
return iparams;
} else if (params.model.paths.size() == 1) {
model = llama_model_load_from_file(params.model.paths[0].c_str(), mparams);
} else {
std::vector<const char *> paths;
paths.reserve(params.model.paths.size());
for (const auto & path : params.model.paths) {
paths.push_back(path.c_str());
}
model = llama_model_load_from_splits(paths.data(), paths.size(), mparams);
}

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
if (model == NULL) {
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.paths[0].c_str());
return iparams;
}

Expand All @@ -925,7 +939,7 @@ struct common_init_result common_init_from_params(common_params & params) {

llama_context * lctx = llama_init_from_model(model, cparams);
if (lctx == NULL) {
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.paths[0].c_str());
llama_model_free(model);
return iparams;
}
Expand Down
8 changes: 4 additions & 4 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ struct common_params_sampling {
};

struct common_params_model {
std::string path = ""; // model local path // NOLINT
std::string url = ""; // model url to download // NOLINT
std::string hf_repo = ""; // HF repo // NOLINT
std::string hf_file = ""; // HF file // NOLINT
std::vector<std::string> paths = {}; // model local path // NOLINT
std::string url = ""; // model url to download // NOLINT
std::string hf_repo = ""; // HF repo // NOLINT
std::string hf_file = ""; // HF file // NOLINT
};

struct common_params_speculative {
Expand Down
15 changes: 14 additions & 1 deletion examples/batched/batched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,20 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
llama_model * model = NULL;
if (params.model.paths.empty()) {
LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
return 1;
} else if (params.model.paths.size() == 1) {
model = llama_model_load_from_file(params.model.paths[0].c_str(), model_params);
} else {
std::vector<const char *> paths;
paths.reserve(params.model.paths.size());
for (const auto & path : params.model.paths) {
paths.push_back(path.c_str());
}
model = llama_model_load_from_splits(paths.data(), paths.size(), model_params);
}

if (model == NULL) {
LOG_ERR("%s: error: unable to load model\n" , __func__);
Expand Down
18 changes: 16 additions & 2 deletions examples/diffusion/diffusion-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,9 +548,23 @@ int main(int argc, char ** argv) {
model_params.use_mlock = params.use_mlock;
model_params.check_tensors = params.check_tensors;

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
llama_model * model = NULL;
if (params.model.paths.empty()) {
LOG_ERR("error: failed to load model 'model path not specified'\n");
return 1;
} else if (params.model.paths.size() == 1) {
model = llama_model_load_from_file(params.model.paths[0].c_str(), model_params);
} else {
std::vector<const char *> paths;
paths.reserve(params.model.paths.size());
for (const auto & path : params.model.paths) {
paths.push_back(path.c_str());
}
model = llama_model_load_from_splits(paths.data(), paths.size(), model_params);
}

if (!model) {
LOG_ERR("error: failed to load model '%s'\n", params.model.path.c_str());
LOG_ERR("error: failed to load model '%s'\n", params.model.paths[0].c_str());
return 1;
}

Expand Down
15 changes: 14 additions & 1 deletion examples/gritlm/gritlm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,20 @@ int main(int argc, char * argv[]) {

llama_backend_init();

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
llama_model * model = NULL;
if (params.model.paths.empty()) {
fprintf(stderr, "failed to load model 'model path not specified'\n");
return 1;
} else if (params.model.paths.size() == 1) {
model = llama_model_load_from_file(params.model.paths[0].c_str(), mparams);
} else {
std::vector<const char *> paths;
paths.reserve(params.model.paths.size());
for (const auto & path : params.model.paths) {
paths.push_back(path.c_str());
}
model = llama_model_load_from_splits(paths.data(), paths.size(), mparams);
}

// create generation context
llama_context * ctx = llama_init_from_model(model, cparams);
Expand Down
2 changes: 1 addition & 1 deletion examples/parallel/parallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ int main(int argc, char ** argv) {
params.prompt_file = "used built-in defaults";
}
LOG_INF("External prompt file: \033[32m%s\033[0m\n", params.prompt_file.c_str());
LOG_INF("Model and path used: \033[32m%s\033[0m\n\n", params.model.path.c_str());
LOG_INF("Model and path used: \033[32m%s\033[0m\n\n", params.model.paths[0].c_str());

LOG_INF("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6);
LOG_INF("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6);
Expand Down
15 changes: 14 additions & 1 deletion examples/passkey/passkey.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,20 @@ int main(int argc, char ** argv) {

llama_model_params model_params = common_model_params_to_llama(params);

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
llama_model * model;
if (params.model.paths.empty()) {
LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
return 1;
} else if (params.model.paths.size() == 1) {
model = llama_model_load_from_file(params.model.paths[0].c_str(), model_params);
} else {
std::vector<const char *> paths;
paths.reserve(params.model.paths.size());
for (const auto & path : params.model.paths) {
paths.push_back(path.c_str());
}
model = llama_model_load_from_splits(paths.data(), paths.size(), model_params);
}

if (model == NULL) {
LOG_ERR("%s: unable to load model\n" , __func__);
Expand Down
4 changes: 2 additions & 2 deletions examples/speculative-simple/speculative-simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ int main(int argc, char ** argv) {

common_init();

if (params.speculative.model.path.empty()) {
if (params.speculative.model.paths.empty()) {
LOG_ERR("%s: --model-draft is required\n", __func__);
return 1;
}
Expand Down Expand Up @@ -67,7 +67,7 @@ int main(int argc, char ** argv) {
ctx_dft = llama_init_dft.context.get();

if (!common_speculative_are_compatible(ctx_tgt, ctx_dft)) {
LOG_INF("the draft model '%s' is not compatible with the target model '%s'. tokens will be translated between the draft and target models.\n", params.speculative.model.path.c_str(), params.model.path.c_str());
LOG_INF("the draft model '%s' is not compatible with the target model '%s'. tokens will be translated between the draft and target models.\n", params.speculative.model.paths[0].c_str(), params.model.paths[0].c_str());
}

// Tokenize the prompt
Expand Down
2 changes: 1 addition & 1 deletion examples/speculative/speculative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ int main(int argc, char ** argv) {

common_init();

if (params.speculative.model.path.empty()) {
if (params.speculative.model.paths.empty()) {
LOG_ERR("%s: --model-draft is required\n", __func__);
return 1;
}
Expand Down
1 change: 1 addition & 0 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ struct llama_model * llama_model_load_from_splits(
return nullptr;
}
for (size_t i = 0; i < n_paths; ++i) {
LLAMA_LOG_INFO("%s: splits[%zu] = '%s'\n", __func__, i, paths[i]);
splits.push_back(paths[i]);
}
return llama_model_load_from_file_impl(splits.front(), splits, params);
Expand Down
8 changes: 4 additions & 4 deletions tests/test-arg-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ int main(void) {

argv = {"binary_name", "-m", "model_file.gguf"};
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
assert(params.model.path == "model_file.gguf");
assert(params.model.paths[0] == "model_file.gguf");

argv = {"binary_name", "-t", "1234"};
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
Expand All @@ -89,7 +89,7 @@ int main(void) {

argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
assert(params.model.path == "abc.gguf");
assert(params.model.paths[0] == "abc.gguf");
assert(params.n_predict == 6789);
assert(params.n_batch == 9090);

Expand All @@ -112,7 +112,7 @@ int main(void) {
setenv("LLAMA_ARG_THREADS", "1010", true);
argv = {"binary_name"};
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
assert(params.model.path == "blah.gguf");
assert(params.model.paths[0] == "blah.gguf");
assert(params.cpuparams.n_threads == 1010);


Expand All @@ -122,7 +122,7 @@ int main(void) {
setenv("LLAMA_ARG_THREADS", "1010", true);
argv = {"binary_name", "-m", "overwritten.gguf"};
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
assert(params.model.path == "overwritten.gguf");
assert(params.model.paths[0] == "overwritten.gguf");
assert(params.cpuparams.n_threads == 1010);
#endif // _WIN32

Expand Down
18 changes: 16 additions & 2 deletions tests/test-thread-safety.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,23 @@ int main(int argc, char ** argv) {
mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;;
}

llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
llama_model * model = NULL;
if (params.model.paths.empty()) {
LOG_ERR("%s: failed to load model 'model path not specified'\n", __func__);
return 1;
} else if (params.model.paths.size() == 1) {
model = llama_model_load_from_file(params.model.paths[0].c_str(), mparams);
} else {
std::vector<const char *> paths;
paths.reserve(params.model.paths.size());
for (const auto & path : params.model.paths) {
paths.push_back(path.c_str());
}
model = llama_model_load_from_splits(paths.data(), paths.size(), mparams);
}

if (model == NULL) {
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.paths[0].c_str());
return 1;
}

Expand Down
Loading
Loading