|
13 | 13 | #include <thread> |
14 | 14 | #include <vector> |
15 | 15 |
|
| 16 | +#if defined(LLAMA_USE_CURL) |
| 17 | +#include <curl/curl.h> |
| 18 | +#include <curl/easy.h> |
| 19 | +#include <future> |
| 20 | +#endif |
| 21 | + |
16 | 22 | #include "json-schema-to-grammar.h" |
17 | 23 |
|
18 | 24 | using json = nlohmann::ordered_json; |
@@ -128,18 +134,105 @@ std::string common_arg::to_string() { |
128 | 134 | // utils |
129 | 135 | // |
130 | 136 |
|
| 137 | +#if defined(LLAMA_USE_CURL) |
| 138 | +/** |
| 139 | + * Allow getting the HF file from the HF repo with tag (like ollama), for example: |
| 140 | + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 |
| 141 | + * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M |
| 142 | + * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s |
| 143 | + * Tag is optional, default to Q4_K_M if it exists |
| 144 | + * Return pair of <repo, file> (with "repo" already having tag removed) |
| 145 | + */ |
| 146 | +static std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) { |
| 147 | + auto parts = string_split<std::string>(hf_repo_with_tag, ':'); |
| 148 | + std::string tag = parts.size() > 1 ? parts[1] : "latest"; // "latest" means checking Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo |
| 149 | + std::string hf_repo = parts[0]; |
| 150 | + if (string_split<std::string>(hf_repo, '/').size() != 2) { |
| 151 | + throw std::invalid_argument("error: invalid HF repo format, expected <user>/<repo>[:tag]\n"); |
| 152 | + } |
| 153 | + |
| 154 | + // fetch model info from Hugging Face Hub API |
| 155 | + json model_info; |
| 156 | + std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup); |
| 157 | + std::unique_ptr<struct curl_slist, decltype(&curl_slist_free_all)> http_headers(nullptr, &curl_slist_free_all); |
| 158 | + std::string res_str; |
| 159 | + std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag; |
| 160 | + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); |
| 161 | + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); |
| 162 | + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); |
| 163 | + auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { |
| 164 | + static_cast<std::string *>(data)->append((char * ) ptr, size * nmemb); |
| 165 | + return size * nmemb; |
| 166 | + }; |
| 167 | + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback)); |
| 168 | + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str); |
| 169 | +#if defined(_WIN32) |
| 170 | + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); |
| 171 | +#endif |
| 172 | + if (!hf_token.empty()) { |
| 173 | + std::string auth_header = "Authorization: Bearer " + hf_token; |
| 174 | + http_headers.reset(curl_slist_append(http_headers.get(), auth_header.c_str())); |
| 175 | + // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response |
| 176 | + http_headers.reset(curl_slist_append(http_headers.get(), "User-Agent: llama-cpp")); |
| 177 | + http_headers.reset(curl_slist_append(http_headers.get(), "Accept: application/json")); |
| 178 | + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.get()); |
| 179 | + } |
| 180 | + CURLcode res = curl_easy_perform(curl.get()); |
| 181 | + |
| 182 | + if (res != CURLE_OK) { |
| 183 | + throw std::runtime_error("error: cannot make GET request to Hugging Face Hub API"); |
| 184 | + } |
| 185 | + |
| 186 | + long res_code; |
| 187 | + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); |
| 188 | + if (res_code == 200) { |
| 189 | + model_info = json::parse(res_str); |
| 190 | + } if (res_code == 401) { |
| 191 | + throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); |
| 192 | + } else { |
| 193 | + throw std::runtime_error(string_format("error: cannot get model info from Hugging Face Hub API, response code: %ld", res_code)); |
| 194 | + } |
| 195 | + |
| 196 | + // check response |
| 197 | + if (!model_info.contains("ggufFile")) { |
| 198 | + throw std::runtime_error("error: model does not have ggufFile"); |
| 199 | + } |
| 200 | + json & gguf_file = model_info.at("ggufFile"); |
| 201 | + if (!gguf_file.contains("rfilename")) { |
| 202 | + throw std::runtime_error("error: ggufFile does not have rfilename"); |
| 203 | + } |
| 204 | + |
| 205 | + // TODO handle error |
| 206 | + return std::make_pair(hf_repo, gguf_file.at("rfilename")); |
| 207 | +} |
| 208 | +#else |
| 209 | +static std::string common_get_hf_file(const std::string &, const std::string &) { |
| 210 | + throw std::runtime_error("error: llama.cpp built without libcurl"); |
| 211 | +} |
| 212 | +#endif |
| 213 | + |
131 | 214 | static void common_params_handle_model_default( |
132 | 215 | std::string & model, |
133 | | - std::string & model_url, |
| 216 | + const std::string & model_url, |
134 | 217 | std::string & hf_repo, |
135 | | - std::string & hf_file) { |
| 218 | + std::string & hf_file, |
| 219 | + const std::string & hf_token) { |
136 | 220 | if (!hf_repo.empty()) { |
137 | 221 | // short-hand to avoid specifying --hf-file -> default it to --model |
138 | 222 | if (hf_file.empty()) { |
139 | 223 | if (model.empty()) { |
140 | | - throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n"); |
| 224 | + try { |
| 225 | + auto auto_detected = common_get_hf_file(hf_repo, hf_token); |
| 226 | + hf_repo = auto_detected.first; |
| 227 | + hf_file = auto_detected.second; |
| 228 | + printf("%s: using hf_file = %s\n", __func__, hf_file.c_str()); |
| 229 | + } catch (std::exception & e) { |
| 230 | + fprintf(stderr, "%s: %s\n", __func__, e.what()); |
| 231 | + exit(1); |
| 232 | + } |
| 233 | + } else { |
| 234 | + hf_file = model; |
141 | 235 | } |
142 | | - hf_file = model; |
143 | 236 | } else if (model.empty()) { |
144 | 237 | // this is to avoid different repo having same file name, or same file name in different subdirs |
145 | 238 | std::string filename = hf_repo + "_" + hf_file; |
@@ -290,8 +383,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context |
290 | 383 | } |
291 | 384 |
|
292 | 385 | // TODO: refactor model params in a common struct |
293 | | - common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file); |
294 | | - common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file); |
| 386 | + common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token); |
| 387 | + common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token); |
295 | 388 |
|
296 | 389 | if (params.escape) { |
297 | 390 | string_process_escapes(params.prompt); |
@@ -1583,21 +1676,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex |
1583 | 1676 | } |
1584 | 1677 | ).set_env("LLAMA_ARG_MODEL_URL")); |
1585 | 1678 | add_opt(common_arg( |
1586 | | - {"-hfr", "--hf-repo"}, "REPO", |
1587 | | - "Hugging Face model repository (default: unused)", |
| 1679 | + {"-hf", "-hfr", "--hf-repo"}, "<repo>/<user>[:quant]", |
| 1680 | + "Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n" |
| 1681 | + "example: unsloth/phi-4-GGUF:q4_k_m\n" |
| 1682 | + "(default: unused)", |
1588 | 1683 | [](common_params & params, const std::string & value) { |
1589 | 1684 | params.hf_repo = value; |
1590 | 1685 | } |
1591 | 1686 | ).set_env("LLAMA_ARG_HF_REPO")); |
1592 | 1687 | add_opt(common_arg( |
1593 | 1688 | {"-hff", "--hf-file"}, "FILE", |
1594 | | - "Hugging Face model file (default: unused)", |
| 1689 | + "Hugging Face model file, unused if quant is already specified in --hf-repo (default: unused)", |
1595 | 1690 | [](common_params & params, const std::string & value) { |
1596 | 1691 | params.hf_file = value; |
1597 | 1692 | } |
1598 | 1693 | ).set_env("LLAMA_ARG_HF_FILE")); |
1599 | 1694 | add_opt(common_arg( |
1600 | | - {"-hfrv", "--hf-repo-v"}, "REPO", |
| 1695 | + {"-hfv", "-hfrv", "--hf-repo-v"}, "<repo>/<user>[:quant]", |
1601 | 1696 | "Hugging Face model repository for the vocoder model (default: unused)", |
1602 | 1697 | [](common_params & params, const std::string & value) { |
1603 | 1698 | params.vocoder.hf_repo = value; |
|
0 commit comments