Skip to content

Commit 8030316

Browse files
committed
common : support tag-based hf_repo like on ollama
1 parent c05e8c9 commit 8030316

File tree

3 files changed

+114
-15
lines changed

3 files changed

+114
-15
lines changed

common/arg.cpp

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
#include <thread>
1414
#include <vector>
1515

16+
#if defined(LLAMA_USE_CURL)
17+
#include <curl/curl.h>
18+
#include <curl/easy.h>
19+
#include <future>
20+
#endif
21+
1622
#include "json-schema-to-grammar.h"
1723

1824
using json = nlohmann::ordered_json;
@@ -128,18 +134,105 @@ std::string common_arg::to_string() {
128134
// utils
129135
//
130136

137+
#if defined(LLAMA_USE_CURL)
138+
/**
139+
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
140+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
141+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
142+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
143+
* Tag is optional, default to Q4_K_M if it exists
144+
* Return pair of <repo, file> (with "repo" already having tag removed)
145+
*/
146+
static std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) {
147+
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
148+
std::string tag = parts.size() > 1 ? parts[1] : "latest"; // "latest" means checking Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo
149+
std::string hf_repo = parts[0];
150+
if (string_split<std::string>(hf_repo, '/').size() != 2) {
151+
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<repo>[:tag]\n");
152+
}
153+
154+
// fetch model info from Hugging Face Hub API
155+
json model_info;
156+
std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup);
157+
std::unique_ptr<struct curl_slist, decltype(&curl_slist_free_all)> http_headers(nullptr, &curl_slist_free_all);
158+
std::string res_str;
159+
std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
160+
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
161+
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
162+
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
163+
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
164+
static_cast<std::string *>(data)->append((char * ) ptr, size * nmemb);
165+
return size * nmemb;
166+
};
167+
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
168+
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str);
169+
#if defined(_WIN32)
170+
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
171+
#endif
172+
if (!hf_token.empty()) {
173+
std::string auth_header = "Authorization: Bearer " + hf_token;
174+
http_headers.reset(curl_slist_append(http_headers.get(), auth_header.c_str()));
175+
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
176+
http_headers.reset(curl_slist_append(http_headers.get(), "User-Agent: llama-cpp"));
177+
http_headers.reset(curl_slist_append(http_headers.get(), "Accept: application/json"));
178+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.get());
179+
}
180+
CURLcode res = curl_easy_perform(curl.get());
181+
182+
if (res != CURLE_OK) {
183+
throw std::runtime_error("error: cannot make GET request to Hugging Face Hub API");
184+
}
185+
186+
long res_code;
187+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
188+
if (res_code == 200) {
189+
model_info = json::parse(res_str);
190+
} if (res_code == 401) {
191+
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
192+
} else {
193+
throw std::runtime_error(string_format("error: cannot get model info from Hugging Face Hub API, response code: %ld", res_code));
194+
}
195+
196+
// check response
197+
if (!model_info.contains("ggufFile")) {
198+
throw std::runtime_error("error: model does not have ggufFile");
199+
}
200+
json & gguf_file = model_info.at("ggufFile");
201+
if (!gguf_file.contains("rfilename")) {
202+
throw std::runtime_error("error: ggufFile does not have rfilename");
203+
}
204+
205+
// TODO handle error
206+
return std::make_pair(hf_repo, gguf_file.at("rfilename"));
207+
}
208+
#else
209+
static std::string common_get_hf_file(const std::string &, const std::string &) {
210+
throw std::runtime_error("error: llama.cpp built without libcurl");
211+
}
212+
#endif
213+
131214
static void common_params_handle_model_default(
132215
std::string & model,
133-
std::string & model_url,
216+
const std::string & model_url,
134217
std::string & hf_repo,
135-
std::string & hf_file) {
218+
std::string & hf_file,
219+
const std::string & hf_token) {
136220
if (!hf_repo.empty()) {
137221
// short-hand to avoid specifying --hf-file -> default it to --model
138222
if (hf_file.empty()) {
139223
if (model.empty()) {
140-
throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n");
224+
try {
225+
auto auto_detected = common_get_hf_file(hf_repo, hf_token);
226+
hf_repo = auto_detected.first;
227+
hf_file = auto_detected.second;
228+
printf("%s: using hf_file = %s\n", __func__, hf_file.c_str());
229+
} catch (std::exception & e) {
230+
fprintf(stderr, "%s: %s\n", __func__, e.what());
231+
exit(1);
232+
}
233+
} else {
234+
hf_file = model;
141235
}
142-
hf_file = model;
143236
} else if (model.empty()) {
144237
// this is to avoid different repo having same file name, or same file name in different subdirs
145238
std::string filename = hf_repo + "_" + hf_file;
@@ -290,8 +383,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
290383
}
291384

292385
// TODO: refactor model params in a common struct
293-
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file);
294-
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file);
386+
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
387+
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token);
295388

296389
if (params.escape) {
297390
string_process_escapes(params.prompt);
@@ -1583,21 +1676,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
15831676
}
15841677
).set_env("LLAMA_ARG_MODEL_URL"));
15851678
add_opt(common_arg(
1586-
{"-hfr", "--hf-repo"}, "REPO",
1587-
"Hugging Face model repository (default: unused)",
1679+
{"-hf", "-hfr", "--hf-repo"}, "<repo>/<user>[:quant]",
1680+
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
1681+
"example: unsloth/phi-4-GGUF:q4_k_m\n"
1682+
"(default: unused)",
15881683
[](common_params & params, const std::string & value) {
15891684
params.hf_repo = value;
15901685
}
15911686
).set_env("LLAMA_ARG_HF_REPO"));
15921687
add_opt(common_arg(
15931688
{"-hff", "--hf-file"}, "FILE",
1594-
"Hugging Face model file (default: unused)",
1689+
"Hugging Face model file, unused if quant is already specified in --hf-repo (default: unused)",
15951690
[](common_params & params, const std::string & value) {
15961691
params.hf_file = value;
15971692
}
15981693
).set_env("LLAMA_ARG_HF_FILE"));
15991694
add_opt(common_arg(
1600-
{"-hfrv", "--hf-repo-v"}, "REPO",
1695+
{"-hfv", "-hfrv", "--hf-repo-v"}, "<repo>/<user>[:quant]",
16011696
"Hugging Face model repository for the vocoder model (default: unused)",
16021697
[](common_params & params, const std::string & value) {
16031698
params.vocoder.hf_repo = value;

common/common.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
11271127
static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) {
11281128
// Initialize libcurl
11291129
std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup);
1130+
std::unique_ptr<struct curl_slist, decltype(&curl_slist_free_all)> http_headers(nullptr, &curl_slist_free_all);
11301131
if (!curl) {
11311132
LOG_ERR("%s: error initializing libcurl\n", __func__);
11321133
return false;
@@ -1140,11 +1141,9 @@ static bool common_download_file(const std::string & url, const std::string & pa
11401141

11411142
// Check if hf-token or bearer-token was specified
11421143
if (!hf_token.empty()) {
1143-
std::string auth_header = "Authorization: Bearer ";
1144-
auth_header += hf_token.c_str();
1145-
struct curl_slist *http_headers = NULL;
1146-
http_headers = curl_slist_append(http_headers, auth_header.c_str());
1147-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers);
1144+
std::string auth_header = "Authorization: Bearer " + hf_token;
1145+
http_headers.reset(curl_slist_append(http_headers.get(), auth_header.c_str()));
1146+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.get());
11481147
}
11491148

11501149
#if defined(_WIN32)

common/common.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,11 @@ static bool string_starts_with(const std::string & str,
454454
return str.rfind(prefix, 0) == 0;
455455
}
456456

457+
static bool string_ends_with(const std::string & str,
458+
const std::string & suffix) { // While we wait for C++20's std::string::ends_with...
459+
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
460+
}
461+
457462
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
458463
void string_process_escapes(std::string & input);
459464

0 commit comments

Comments
 (0)