@@ -907,7 +907,11 @@ struct common_init_result common_init_from_params(common_params & params) {
907907 llama_model * model = nullptr ;
908908
909909 if (!params.hf_repo .empty () && !params.hf_file .empty ()) {
910- model = common_load_model_from_hf (params.hf_repo , params.hf_file , params.model , params.hf_token , mparams);
910+ if (LLAMACPP_USE_MODELSCOPE_DEFINITION) {
911+ model = common_load_model_from_ms (params.hf_repo , params.hf_file , params.model , params.hf_token , mparams);
912+ } else {
913+ model = common_load_model_from_hf (params.hf_repo , params.hf_file , params.model , params.hf_token , mparams);
914+ }
911915 } else if (!params.model_url .empty ()) {
912916 model = common_load_model_from_url (params.model_url , params.model , params.hf_token , mparams);
913917 } else {
@@ -1207,6 +1211,12 @@ static bool common_download_file(const std::string & url, const std::string & pa
12071211 curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
12081212 curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
12091213
1214+ std::vector<std::string> _headers = {" User-Agent: llama-cpp" };
1215+ for (const auto & header : _headers) {
1216+ http_headers.ptr = curl_slist_append (http_headers.ptr , header.c_str ());
1217+ }
1218+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
1219+
12101220 // Check if hf-token or bearer-token was specified
12111221 if (!hf_token.empty ()) {
12121222 std::string auth_header = " Authorization: Bearer " + hf_token;
@@ -1265,6 +1275,7 @@ static bool common_download_file(const std::string & url, const std::string & pa
12651275 };
12661276
12671277 common_load_model_from_url_headers headers;
1278+ bool should_download = false ;
12681279
12691280 {
12701281 typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
@@ -1293,32 +1304,35 @@ static bool common_download_file(const std::string & url, const std::string & pa
12931304 curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); // hide head request progress
12941305 curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
12951306 curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
1307+ if (!LLAMACPP_USE_MODELSCOPE_DEFINITION) {
1308+ bool was_perform_successful = curl_perform_with_retry (url, curl.get (), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
1309+ if (!was_perform_successful) {
1310+ return false ;
1311+ }
12961312
1297- bool was_perform_successful = curl_perform_with_retry (url, curl.get (), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
1298- if (!was_perform_successful) {
1299- return false ;
1300- }
1301-
1302- long http_code = 0 ;
1303- curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
1304- if (http_code != 200 ) {
1305- // HEAD not supported, we don't know if the file has changed
1306- // force trigger downloading
1307- force_download = true ;
1308- LOG_ERR (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
1313+ long http_code = 0 ;
1314+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
1315+ if (http_code != 200 ) {
1316+ // HEAD not supported, we don't know if the file has changed
1317+ // force trigger downloading
1318+ force_download = true ;
1319+ LOG_ERR (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
1320+ }
1321+ should_download = !file_exists || force_download;
1322+ if (!should_download) {
1323+ if (!etag.empty () && etag != headers.etag ) {
1324+ LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__, etag.c_str (), headers.etag .c_str ());
1325+ should_download = true ;
1326+ } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
1327+ LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__, last_modified.c_str (), headers.last_modified .c_str ());
1328+ should_download = true ;
1329+ }
1330+ }
1331+ } else {
1332+ should_download = !file_exists;
13091333 }
13101334 }
13111335
1312- bool should_download = !file_exists || force_download;
1313- if (!should_download) {
1314- if (!etag.empty () && etag != headers.etag ) {
1315- LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__, etag.c_str (), headers.etag .c_str ());
1316- should_download = true ;
1317- } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
1318- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__, last_modified.c_str (), headers.last_modified .c_str ());
1319- should_download = true ;
1320- }
1321- }
13221336 if (should_download) {
13231337 std::string path_temporary = path + " .downloadInProgress" ;
13241338 if (file_exists) {
@@ -1507,6 +1521,20 @@ struct llama_model * common_load_model_from_hf(
15071521 return common_load_model_from_url (model_url, local_path, hf_token, params);
15081522}
15091523
1524+ struct llama_model * common_load_model_from_ms (
1525+ const std::string & repo,
1526+ const std::string & remote_path,
1527+ const std::string & local_path,
1528+ const std::string & ms_token,
1529+ const struct llama_model_params & params) {
1530+ std::string model_url = " https://" + MODELSCOPE_DOMAIN_DEFINITION + " /models/" ;
1531+ model_url += repo;
1532+ model_url += " /resolve/master/" ;
1533+ model_url += remote_path;
1534+ // modelscope does not support token in header
1535+ return common_load_model_from_url (model_url, local_path, " " , params);
1536+ }
1537+
15101538/* *
15111539 * Allow getting the HF file from the HF repo with tag (like ollama), for example:
15121540 * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
@@ -1581,6 +1609,82 @@ std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_re
15811609 return std::make_pair (hf_repo, gguf_file.at (" rfilename" ));
15821610}
15831611
1612+ std::pair<std::string, std::string> common_get_ms_file (const std::string & ms_repo_with_tag, const std::string & ms_token) {
1613+ auto parts = string_split<std::string>(ms_repo_with_tag, ' :' );
1614+ std::string tag = parts.size () > 1 ? parts.back () : " Q4_K_M" ;
1615+ std::string hf_repo = parts[0 ];
1616+ if (string_split<std::string>(hf_repo, ' /' ).size () != 2 ) {
1617+ throw std::invalid_argument (" error: invalid HF repo format, expected <user>/<model>[:quant]\n " );
1618+ }
1619+
1620+ // fetch model info from Hugging Face Hub API
1621+ json model_info;
1622+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
1623+ curl_slist_ptr http_headers;
1624+ std::string res_str;
1625+ auto endpoint = MODELSCOPE_DOMAIN_DEFINITION;
1626+
1627+ std::string url = endpoint + " /api/v1/models/" + hf_repo + " /repo/files?Revision=master&Recursive=True" ;
1628+ curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
1629+ curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L );
1630+ typedef size_t (*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
1631+ auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
1632+ static_cast <std::string *>(data)->append ((char * ) ptr, size * nmemb);
1633+ return size * nmemb;
1634+ };
1635+ curl_easy_setopt (curl.get (), CURLOPT_WRITEFUNCTION, static_cast <CURLOPT_WRITEFUNCTION_PTR>(write_callback));
1636+ curl_easy_setopt (curl.get (), CURLOPT_WRITEDATA, &res_str);
1637+ #if defined(_WIN32)
1638+ curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
1639+ #endif
1640+ // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
1641+ http_headers.ptr = curl_slist_append (http_headers.ptr , " user-agent: llama-cpp" );
1642+ http_headers.ptr = curl_slist_append (http_headers.ptr , " Accept: application/json" );
1643+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
1644+
1645+ CURLcode res = curl_easy_perform (curl.get ());
1646+
1647+ if (res != CURLE_OK) {
1648+ throw std::runtime_error (" error: cannot make GET request to HF API" );
1649+ }
1650+
1651+ long res_code;
1652+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &res_code);
1653+ if (res_code == 200 ) {
1654+ model_info = nlohmann::json::parse (res_str);
1655+ } else if (res_code == 401 ) {
1656+ throw std::runtime_error (" error: model is private or does not exist; if you are accessing a gated model, please provide a valid MS token" );
1657+ } else {
1658+ throw std::runtime_error (string_format (" error from MS API, response code: %ld, data: %s" , res_code, res_str.c_str ()));
1659+ }
1660+
1661+ auto all_files = model_info[" Data" ][" Files" ];
1662+
1663+ std::vector<std::string> all_available_files;
1664+ std::string gguf_file;
1665+ std::string upper_tag;
1666+ upper_tag.reserve (tag.size ());
1667+ std::string lower_tag;
1668+ lower_tag.reserve (tag.size ());
1669+ std::transform (tag.begin (), tag.end (), std::back_inserter (upper_tag), ::toupper);
1670+ std::transform (tag.begin (), tag.end (), std::back_inserter (lower_tag), ::tolower);
1671+ for (const auto & _file : all_files) {
1672+ auto file = _file[" Path" ].get <std::string>();
1673+ if (!string_ends_with (file, " .gguf" )) {
1674+ continue ;
1675+ }
1676+ if (file.find (upper_tag) != std::string::npos || file.find (lower_tag) != std::string::npos) {
1677+ gguf_file = file;
1678+ }
1679+ all_available_files.push_back (file);
1680+ }
1681+ if (gguf_file.empty ()) {
1682+ gguf_file = all_available_files[0 ];
1683+ }
1684+
1685+ return std::make_pair (hf_repo, gguf_file);
1686+ }
1687+
15841688#else
15851689
15861690struct llama_model * common_load_model_from_url (
0 commit comments