Skip to content

Commit 242135e

Browse files
committed
various fixes
1 parent ef089ca commit 242135e

File tree

3 files changed

+45
-32
lines changed

3 files changed

+45
-32
lines changed

common/arg.cpp

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,6 @@
1313
#include <thread>
1414
#include <vector>
1515

16-
#if defined(LLAMA_USE_CURL)
17-
#include <curl/curl.h>
18-
#include <curl/easy.h>
19-
#include <future>
20-
#endif
21-
2216
#include "json-schema-to-grammar.h"
2317

2418
using json = nlohmann::ordered_json;
@@ -140,21 +134,21 @@ std::string common_arg::to_string() {
140134
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
141135
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
142136
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
143-
* Tag is optional, default to Q4_K_M if it exists
137+
* Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
144138
* Return pair of <repo, file> (with "repo" already having tag removed)
145139
*/
146140
static std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) {
147141
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
148-
std::string tag = parts.size() > 1 ? parts[1] : "latest"; // "latest" means checking Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo
142+
std::string tag = parts.size() > 1 ? parts.back() : "latest";
149143
std::string hf_repo = parts[0];
150144
if (string_split<std::string>(hf_repo, '/').size() != 2) {
151-
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<repo>[:tag]\n");
145+
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
152146
}
153147

154148
// fetch model info from Hugging Face Hub API
155149
json model_info;
156-
std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup);
157-
std::unique_ptr<struct curl_slist, decltype(&curl_slist_free_all)> http_headers(nullptr, &curl_slist_free_all);
150+
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
151+
curl_slist_ptr http_headers;
158152
std::string res_str;
159153
std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
160154
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
@@ -171,26 +165,27 @@ static std::pair<std::string, std::string> common_get_hf_file(const std::string
171165
#endif
172166
if (!hf_token.empty()) {
173167
std::string auth_header = "Authorization: Bearer " + hf_token;
174-
http_headers.reset(curl_slist_append(http_headers.get(), auth_header.c_str()));
175-
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
176-
http_headers.reset(curl_slist_append(http_headers.get(), "User-Agent: llama-cpp"));
177-
http_headers.reset(curl_slist_append(http_headers.get(), "Accept: application/json"));
178-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.get());
168+
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
179169
}
170+
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
171+
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
172+
http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
173+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
174+
180175
CURLcode res = curl_easy_perform(curl.get());
181176

182177
if (res != CURLE_OK) {
183-
throw std::runtime_error("error: cannot make GET request to Hugging Face Hub API");
178+
throw std::runtime_error("error: cannot make GET request to HF API");
184179
}
185180

186181
long res_code;
187182
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
188183
if (res_code == 200) {
189184
model_info = json::parse(res_str);
190-
} if (res_code == 401) {
185+
} else if (res_code == 401) {
191186
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
192187
} else {
193-
throw std::runtime_error(string_format("error: cannot get model info from Hugging Face Hub API, response code: %ld", res_code));
188+
throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
194189
}
195190

196191
// check response
@@ -202,7 +197,6 @@ static std::pair<std::string, std::string> common_get_hf_file(const std::string
202197
throw std::runtime_error("error: ggufFile does not have rfilename");
203198
}
204199

205-
// TODO handle error
206200
return std::make_pair(hf_repo, gguf_file.at("rfilename"));
207201
}
208202
#else
@@ -1676,7 +1670,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16761670
}
16771671
).set_env("LLAMA_ARG_MODEL_URL"));
16781672
add_opt(common_arg(
1679-
{"-hf", "-hfr", "--hf-repo"}, "<repo>/<user>[:quant]",
1673+
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
16801674
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
16811675
"example: unsloth/phi-4-GGUF:q4_k_m\n"
16821676
"(default: unused)",
@@ -1686,13 +1680,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16861680
).set_env("LLAMA_ARG_HF_REPO"));
16871681
add_opt(common_arg(
16881682
{"-hff", "--hf-file"}, "FILE",
1689-
"Hugging Face model file, unused if quant is already specified in --hf-repo (default: unused)",
1683+
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
16901684
[](common_params & params, const std::string & value) {
16911685
params.hf_file = value;
16921686
}
16931687
).set_env("LLAMA_ARG_HF_FILE"));
16941688
add_opt(common_arg(
1695-
{"-hfv", "-hfrv", "--hf-repo-v"}, "<repo>/<user>[:quant]",
1689+
{"-hfv", "-hfrv", "--hf-repo-v"}, "<user>/<model>[:quant]",
16961690
"Hugging Face model repository for the vocoder model (default: unused)",
16971691
[](common_params & params, const std::string & value) {
16981692
params.vocoder.hf_repo = value;

common/common.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,6 @@
5252
#include <sys/stat.h>
5353
#include <unistd.h>
5454
#endif
55-
#if defined(LLAMA_USE_CURL)
56-
#include <curl/curl.h>
57-
#include <curl/easy.h>
58-
#include <future>
59-
#endif
6055

6156
#if defined(_MSC_VER)
6257
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -1126,8 +1121,8 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
11261121

11271122
static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) {
11281123
// Initialize libcurl
1129-
std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup);
1130-
std::unique_ptr<struct curl_slist, decltype(&curl_slist_free_all)> http_headers(nullptr, &curl_slist_free_all);
1124+
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
1125+
curl_slist_ptr http_headers;
11311126
if (!curl) {
11321127
LOG_ERR("%s: error initializing libcurl\n", __func__);
11331128
return false;
@@ -1142,8 +1137,8 @@ static bool common_download_file(const std::string & url, const std::string & pa
11421137
// Check if hf-token or bearer-token was specified
11431138
if (!hf_token.empty()) {
11441139
std::string auth_header = "Authorization: Bearer " + hf_token;
1145-
http_headers.reset(curl_slist_append(http_headers.get(), auth_header.c_str()));
1146-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.get());
1140+
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
1141+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
11471142
}
11481143

11491144
#if defined(_WIN32)

common/common.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88
#include <vector>
99
#include <sstream>
1010

11+
#if defined(LLAMA_USE_CURL)
12+
#include <curl/curl.h>
13+
#include <curl/easy.h>
14+
#include <future>
15+
#endif
16+
1117
#ifdef _WIN32
1218
#define DIRECTORY_SEPARATOR '\\'
1319
#else
@@ -651,4 +657,22 @@ const char * const LLM_KV_SPLIT_NO = "split.no";
651657
const char * const LLM_KV_SPLIT_COUNT = "split.count";
652658
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
653659

660+
#if defined(LLAMA_USE_CURL)
661+
//
662+
// CURL utils
663+
//
664+
665+
using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
666+
667+
// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
668+
struct curl_slist_ptr {
669+
struct curl_slist * ptr = nullptr;
670+
~curl_slist_ptr() {
671+
if (ptr) {
672+
curl_slist_free_all(ptr);
673+
}
674+
}
675+
};
676+
#endif
677+
654678
}

0 commit comments

Comments
 (0)