7373#include < sys/syslimits.h>
7474#endif
7575#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
76+
77+ //
78+ // CURL utils
79+ //
80+
81+ using curl_ptr = std::unique_ptr<CURL, decltype (&curl_easy_cleanup)>;
82+
83+ // cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
84+ struct curl_slist_ptr {
85+ struct curl_slist * ptr = nullptr ;
86+ ~curl_slist_ptr () {
87+ if (ptr) {
88+ curl_slist_free_all (ptr);
89+ }
90+ }
91+ };
7692#endif // LLAMA_USE_CURL
7793
7894using json = nlohmann::ordered_json;
@@ -1130,7 +1146,8 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
11301146
11311147static bool common_download_file (const std::string & url, const std::string & path, const std::string & hf_token) {
11321148 // Initialize libcurl
1133- std::unique_ptr<CURL, decltype (&curl_easy_cleanup)> curl (curl_easy_init (), &curl_easy_cleanup);
1149+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
1150+ curl_slist_ptr http_headers;
11341151 if (!curl) {
11351152 LOG_ERR (" %s: error initializing libcurl\n " , __func__);
11361153 return false ;
@@ -1144,11 +1161,9 @@ static bool common_download_file(const std::string & url, const std::string & pa
11441161
11451162 // Check if hf-token or bearer-token was specified
11461163 if (!hf_token.empty ()) {
1147- std::string auth_header = " Authorization: Bearer " ;
1148- auth_header += hf_token.c_str ();
1149- struct curl_slist *http_headers = NULL ;
1150- http_headers = curl_slist_append (http_headers, auth_header.c_str ());
1151- curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers);
1164+ std::string auth_header = " Authorization: Bearer " + hf_token;
1165+ http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
1166+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
11521167 }
11531168
11541169#if defined(_WIN32)
@@ -1444,6 +1459,80 @@ struct llama_model * common_load_model_from_hf(
14441459 return common_load_model_from_url (model_url, local_path, hf_token, params);
14451460}
14461461
1462+ /* *
1463+ * Allow getting the HF file from the HF repo with tag (like ollama), for example:
1464+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
1465+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
1466+ * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
1467+ * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
1468+ *
1469+ * Return pair of <repo, file> (with "repo" already having tag removed)
1470+ *
1471+ * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
1472+ */
1473+ std::pair<std::string, std::string> common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & hf_token) {
1474+ auto parts = string_split<std::string>(hf_repo_with_tag, ' :' );
1475+ std::string tag = parts.size () > 1 ? parts.back () : " latest" ;
1476+ std::string hf_repo = parts[0 ];
1477+ if (string_split<std::string>(hf_repo, ' /' ).size () != 2 ) {
1478+ throw std::invalid_argument (" error: invalid HF repo format, expected <user>/<model>[:quant]\n " );
1479+ }
1480+
1481+ // fetch model info from Hugging Face Hub API
1482+ json model_info;
1483+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
1484+ curl_slist_ptr http_headers;
1485+ std::string res_str;
1486+ std::string url = " https://huggingface.co/v2/" + hf_repo + " /manifests/" + tag;
1487+ curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
1488+ curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L );
1489+ typedef size_t (*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
1490+ auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
1491+ static_cast <std::string *>(data)->append ((char * ) ptr, size * nmemb);
1492+ return size * nmemb;
1493+ };
1494+ curl_easy_setopt (curl.get (), CURLOPT_WRITEFUNCTION, static_cast <CURLOPT_WRITEFUNCTION_PTR>(write_callback));
1495+ curl_easy_setopt (curl.get (), CURLOPT_WRITEDATA, &res_str);
1496+ #if defined(_WIN32)
1497+ curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
1498+ #endif
1499+ if (!hf_token.empty ()) {
1500+ std::string auth_header = " Authorization: Bearer " + hf_token;
1501+ http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
1502+ }
1503+ // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
1504+ http_headers.ptr = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp" );
1505+ http_headers.ptr = curl_slist_append (http_headers.ptr , " Accept: application/json" );
1506+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
1507+
1508+ CURLcode res = curl_easy_perform (curl.get ());
1509+
1510+ if (res != CURLE_OK) {
1511+ throw std::runtime_error (" error: cannot make GET request to HF API" );
1512+ }
1513+
1514+ long res_code;
1515+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &res_code);
1516+ if (res_code == 200 ) {
1517+ model_info = json::parse (res_str);
1518+ } else if (res_code == 401 ) {
1519+ throw std::runtime_error (" error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token" );
1520+ } else {
1521+ throw std::runtime_error (string_format (" error from HF API, response code: %ld, data: %s" , res_code, res_str.c_str ()));
1522+ }
1523+
1524+ // check response
1525+ if (!model_info.contains (" ggufFile" )) {
1526+ throw std::runtime_error (" error: model does not have ggufFile" );
1527+ }
1528+ json & gguf_file = model_info.at (" ggufFile" );
1529+ if (!gguf_file.contains (" rfilename" )) {
1530+ throw std::runtime_error (" error: ggufFile does not have rfilename" );
1531+ }
1532+
1533+ return std::make_pair (hf_repo, gguf_file.at (" rfilename" ));
1534+ }
1535+
14471536#else
14481537
14491538struct llama_model * common_load_model_from_url (
@@ -1465,6 +1554,11 @@ struct llama_model * common_load_model_from_hf(
14651554 return nullptr ;
14661555}
14671556
1557+ std::pair<std::string, std::string> common_get_hf_file (const std::string &, const std::string &) {
1558+ LOG_WRN (" %s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n " , __func__);
1559+ return std::make_pair (" " , " " );
1560+ }
1561+
14681562#endif // LLAMA_USE_CURL
14691563
14701564//
0 commit comments