Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt

You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`

Altenatively, model can be fetched from [ModelScope](https://www.modelscope.cn) with CLI argument of `-ms <user>/<model>[:quant]`, for example, `llama-cli -ms Qwen/QwQ-32B-GGUF`. You may find models on ModelScope compatible with `llama.cpp` through:

- [Trending] https://www.modelscope.cn/models?libraries=GGUF

> You can change the download endpoint of ModelScope by using `MODELSCOPE_DOMAIN=xxx`(like MODELSCOPE_DOMAIN=www.modelscope.ai).

After downloading a model, use the CLI tools to run it locally - see below.

`llama.cpp` requires the model to be stored in the [GGUF](https://github.com/ggml-org/ggml/blob/master/docs/gguf.md) file format. Models in other data formats can be converted to GGUF using the `convert_*.py` Python scripts in this repo.
Expand Down
77 changes: 76 additions & 1 deletion common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ common_arg & common_arg::set_env(const char * env) {
return *this;
}

static void set_ms_env() {
#ifdef _WIN32
putenv("LLAMACPP_USE_MODELSCOPE=True");
#else
setenv("LLAMACPP_USE_MODELSCOPE", "True", 1);
#endif
}

common_arg & common_arg::set_sparam() {
is_sparam = true;
return *this;
Expand Down Expand Up @@ -140,7 +148,12 @@ static void common_params_handle_model_default(
// short-hand to avoid specifying --hf-file -> default it to --model
if (hf_file.empty()) {
if (model.empty()) {
auto auto_detected = common_get_hf_file(hf_repo, hf_token);
std::pair<std::string, std::string> auto_detected;
if (LLAMACPP_USE_MODELSCOPE_DEFINITION) {
auto_detected = common_get_ms_file(hf_repo, hf_token);
} else {
auto_detected = common_get_hf_file(hf_repo, hf_token);
}
if (auto_detected.first.empty() || auto_detected.second.empty()) {
exit(1); // built without CURL, error message already printed
}
Expand Down Expand Up @@ -206,6 +219,10 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
std::string arg;
const std::string arg_prefix = "--";
common_params & params = ctx_arg.params;
std::vector<std::string> ms_params = {
"-ms", "-msr", "--ms-repo",
"-msv", "-msrv", "--ms-repo-v"
};

std::unordered_map<std::string, common_arg *> arg_to_options;
for (auto & opt : ctx_arg.options) {
Expand All @@ -219,6 +236,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
std::string value;
if (opt.get_value_from_env(value)) {
try {
for (auto msp: ms_params) {
bool ms_arg = false;
for (auto _arg: opt.args) {
if (std::string(_arg) == msp) {
ms_arg = true;
}
}
if (ms_arg && !value.empty()) {
set_ms_env();
break;
}
}
if (opt.handler_void && (value == "1" || value == "true")) {
opt.handler_void(params);
}
Expand Down Expand Up @@ -266,6 +295,15 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
// arg with single value
check_arg(i);
std::string val = argv[++i];

for (auto msp: ms_params) {
// Check whether is modelscope params
if (msp == arg && !val.empty()) {
set_ms_env();
break;
}
}

if (opt.handler_int) {
opt.handler_int(params, std::stoi(val));
continue;
Expand Down Expand Up @@ -1844,6 +1882,43 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.hf_token = value;
}
).set_env("HF_TOKEN"));
add_opt(common_arg(
{"-ms", "-msr", "--ms-repo"}, "<user>/<model>[:quant]",
"ModelScope model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
"example: unsloth/phi-4-GGUF:q4_k_m\n"
"(default: unused)",
[](common_params & params, const std::string & value) {
params.hf_repo = value;
}
).set_env("LLAMA_ARG_MS_REPO"));
add_opt(common_arg(
{"-msf", "--ms-file"}, "FILE",
"ModelScope model file. If specified, it will override the quant in --ms-repo (default: unused)",
[](common_params & params, const std::string & value) {
params.hf_file = value;
}
).set_env("LLAMA_ARG_MS_FILE"));
add_opt(common_arg(
{"-msv", "-msrv", "--ms-repo-v"}, "<user>/<model>[:quant]",
"ModelScope model repository for the vocoder model (default: unused)",
[](common_params & params, const std::string & value) {
params.vocoder.hf_repo = value;
}
).set_env("LLAMA_ARG_MS_REPO_V"));
add_opt(common_arg(
{"-msfv", "--ms-file-v"}, "FILE",
"ModelScope model file for the vocoder model (default: unused)",
[](common_params & params, const std::string & value) {
params.vocoder.hf_file = value;
}
).set_env("LLAMA_ARG_MS_FILE_V"));
add_opt(common_arg(
{"-mst", "--ms-token"}, "TOKEN",
"ModelScope access token (default: value from MS_TOKEN environment variable)",
[](common_params & params, const std::string & value) {
params.hf_token = value;
}
).set_env("MS_TOKEN"));
add_opt(common_arg(
{"--context-file"}, "FNAME",
"file to load context from (repeat to specify multiple files)",
Expand Down
Loading