Skip to content

Commit a7c4895

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 8c57157 + 578754b commit a7c4895

File tree

6 files changed

+36
-15
lines changed

6 files changed

+36
-15
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,9 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt
260260
- [Trending](https://huggingface.co/models?library=gguf&sort=trending)
261261
- [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf)
262262

263-
You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
263+
You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf <user>/<model>[:quant]`.
264+
265+
By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`.
264266

265267
After downloading a model, use the CLI tools to run it locally - see below.
266268

common/arg.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,13 @@ static bool common_download_file_single(const std::string & url, const std::stri
228228
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
229229
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
230230

231+
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
231232
// Check if hf-token or bearer-token was specified
232233
if (!bearer_token.empty()) {
233234
std::string auth_header = "Authorization: Bearer " + bearer_token;
234235
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
235-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
236236
}
237+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
237238

238239
#if defined(_WIN32)
239240
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
@@ -544,7 +545,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
544545
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
545546
curl_slist_ptr http_headers;
546547
std::string res_str;
547-
std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
548+
549+
std::string model_endpoint = get_model_endpoint();
550+
551+
std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
548552
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
549553
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
550554
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
@@ -659,13 +663,8 @@ static void common_params_handle_model(
659663
}
660664
}
661665

662-
std::string hf_endpoint = "https://huggingface.co/";
663-
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
664-
if (hf_endpoint_env) {
665-
hf_endpoint = hf_endpoint_env;
666-
if (hf_endpoint.back() != '/') hf_endpoint += '/';
667-
}
668-
model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
666+
std::string model_endpoint = get_model_endpoint();
667+
model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
669668
// make sure model path is present (for caching purposes)
670669
if (model.path.empty()) {
671670
// this is to avoid different repo having same file name, or same file name in different subdirs

common/common.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,19 @@ struct common_init_result common_init_from_params(common_params & params) {
10271027
return iparams;
10281028
}
10291029

1030+
std::string get_model_endpoint() {
1031+
const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
1032+
// We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
1033+
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
1034+
const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
1035+
std::string model_endpoint = "https://huggingface.co/";
1036+
if (endpoint_env) {
1037+
model_endpoint = endpoint_env;
1038+
if (model_endpoint.back() != '/') model_endpoint += '/';
1039+
}
1040+
return model_endpoint;
1041+
}
1042+
10301043
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
10311044
llama_clear_adapter_lora(ctx);
10321045
for (auto & la : lora) {

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,8 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p
543543
// clear LoRA adapters from context, then apply new list of adapters
544544
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
545545

546+
std::string get_model_endpoint();
547+
546548
//
547549
// Batch utils
548550
//

examples/run/run.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,10 @@ class LlamaData {
697697
std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json" };
698698
std::string url;
699699

700+
std::string model_endpoint = get_model_endpoint();
701+
700702
if (pos == std::string::npos) {
701-
auto [model_name, manifest_url] = extract_model_and_tag(model, "https://huggingface.co/v2/");
703+
auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/");
702704
hfr = model_name;
703705

704706
nlohmann::json manifest;
@@ -713,7 +715,7 @@ class LlamaData {
713715
hff = model.substr(pos + 1);
714716
}
715717

716-
url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff;
718+
url = model_endpoint + hfr + "/resolve/main/" + hff;
717719

718720
return download(url, bn, true, headers);
719721
}

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3653,7 +3653,8 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
36533653

36543654
#ifdef GGML_SYCL_GRAPH
36553655
if (!g_ggml_sycl_disable_graph) {
3656-
if (!sycl_ctx->exec_graph && !dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_graph)) {
3656+
const bool graph_support = dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_limited_graph);
3657+
if (!graph_support) {
36573658
GGML_SYCL_DEBUG("[SYCL-GRAPH] can not use graphs on device:%d\n", sycl_ctx->device);
36583659
ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
36593660
return GGML_STATUS_SUCCESS;
@@ -3664,8 +3665,10 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
36643665
ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
36653666
model_sycl_graph.end_recording();
36663667

3667-
if (!sycl_ctx->exec_graph) {
3668-
auto exec_graph = model_sycl_graph.finalize({sycl_ex::property::graph::updatable{}});
3668+
const bool graph_update_support = dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_graph);
3669+
if (!sycl_ctx->exec_graph || !graph_update_support) {
3670+
auto exec_graph = graph_update_support ? model_sycl_graph.finalize(sycl_ex::property::graph::updatable{}) :
3671+
model_sycl_graph.finalize();
36693672
sycl_ctx->exec_graph = std::make_unique<
36703673
sycl_ex::command_graph<sycl_ex::graph_state::executable>>(exec_graph);
36713674
} else {

0 commit comments

Comments
 (0)