From 21b5988713d89813b8dd862c65fc736fad5171c8 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 01:25:37 +0100 Subject: [PATCH 1/9] server: --offline mode also support OFFLINE=1 ./tests.sh for server tests --- common/arg.cpp | 292 +++++++++++++++++++----------------- common/common.h | 1 + tools/server/tests/utils.py | 2 + 3 files changed, 160 insertions(+), 135 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 62eec8337e033..bb8754c568f74 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -242,33 +242,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma } // download one single file from remote URL to local path -static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token) { - // Initialize libcurl - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; - if (!curl) { - LOG_ERR("%s: error initializing libcurl\n", __func__); - return false; - } - - // Set the URL, allow to follow http redirection - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); - - http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); - // Check if hf-token or bearer-token was specified - if (!bearer_token.empty()) { - std::string auth_header = "Authorization: Bearer " + bearer_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - } - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); - -#if defined(_WIN32) - // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of - // operating system. Currently implemented under MS-Windows. - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -#endif - +static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) { // Check if the file already exists locally auto file_exists = std::filesystem::exists(path); @@ -298,6 +272,9 @@ static bool common_download_file_single(const std::string & url, const std::stri // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again) } else { LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); + if (offline) { + return false; + } } // Send a HEAD request to retrieve the etag and last-modified headers @@ -308,10 +285,37 @@ static bool common_download_file_single(const std::string & url, const std::stri common_load_model_from_url_headers headers; bool head_request_ok = false; - bool should_download = !file_exists; // by default, we should download if the file does not exist - - // get ETag to see if the remote file has changed + + if (!file_exists || !offline) { + bool should_download = !file_exists; // by default, we should download if the file does not exist + + // Initialize libcurl + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + if (!curl) { + LOG_ERR("%s: error initializing libcurl\n", __func__); + return false; + } + + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + // Check if hf-token or bearer-token was specified + if (!bearer_token.empty()) { + std::string auth_header = "Authorization: Bearer " + bearer_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + } + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + +#if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; @@ -354,105 +358,107 @@ static bool common_download_file_single(const std::string & url, const std::stri LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); head_request_ok = false; } - } - // if head_request_ok is false, we don't have the etag or last-modified headers - // we leave should_download as-is, which is true if the file does not exist - if (head_request_ok) { - // check if ETag or Last-Modified headers are different - // if it is, we need to download the file again - if (!etag.empty() && etag != headers.etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); - should_download = true; - } else if (!last_modified.empty() && last_modified != headers.last_modified) { - LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); - should_download = true; + // if head_request_ok is false, we don't have the etag or last-modified headers + // we leave should_download as-is, which is true if the file does not exist + if (head_request_ok) { + // check if ETag or Last-Modified headers are different + // if it is, we need to download the file again + if (!etag.empty() && etag != headers.etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); + should_download = true; + } else if (!last_modified.empty() && last_modified != headers.last_modified) { + LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); + should_download = true; + } } - } - if (should_download) { - std::string path_temporary = path + ".downloadInProgress"; - if (file_exists) { - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + if (should_download) { + std::string path_temporary = path + ".downloadInProgress"; + if (file_exists) { + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } } - } - // Set the output file + // Set the output file - struct FILE_deleter { - void operator()(FILE * f) const { - fclose(f); - } - }; + struct FILE_deleter { + void operator()(FILE * f) const { + fclose(f); + } + }; - std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); - if (!outfile) { - LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); - return false; - } + std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); + if (!outfile) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); + return false; + } - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); - auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { - return fwrite(data, size, nmemb, (FILE *)fd); - }; - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { + return fwrite(data, size, nmemb, (FILE *)fd); + }; + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); - // display download progress - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); + // display download progress + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); - // helper function to hide password in URL - auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { - std::size_t protocol_pos = url.find("://"); - if (protocol_pos == std::string::npos) { - return url; // Malformed URL - } + // helper function to hide password in URL + auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { + std::size_t protocol_pos = url.find("://"); + if (protocol_pos == std::string::npos) { + return url; // Malformed URL + } - std::size_t at_pos = url.find('@', protocol_pos + 3); - if (at_pos == std::string::npos) { - return url; // No password in URL - } + std::size_t at_pos = url.find('@', protocol_pos + 3); + if (at_pos == std::string::npos) { + return url; // No password in URL + } - return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); - }; + return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); + }; - // start the download - LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, - llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET"); - if (!was_perform_successful) { - return false; - } + // start the download + LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, + llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET"); + if (!was_perform_successful) { + return false; + } - long http_code = 0; - curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { - LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; - } + long http_code = 0; + curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); + return false; + } - // Causes file to be closed explicitly here before we rename it. - outfile.reset(); + // Causes file to be closed explicitly here before we rename it. + outfile.reset(); - // Write the updated JSON metadata file. - metadata.update({ - {"url", url}, - {"etag", headers.etag}, - {"lastModified", headers.last_modified} - }); - write_file(metadata_path, metadata.dump(4)); - LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + // Write the updated JSON metadata file. + metadata.update({ + {"url", url}, + {"etag", headers.etag}, + {"lastModified", headers.last_modified} + }); + write_file(metadata_path, metadata.dump(4)); + LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); - if (rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + } else { + LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); } } else { - LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); + LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); } return true; @@ -460,12 +466,12 @@ static bool common_download_file_single(const std::string & url, const std::stri // download multiple files from remote URLs to local paths // the input is a vector of pairs -static bool common_download_file_multiple(const std::vector> & urls, const std::string & bearer_token) { +static bool common_download_file_multiple(const std::vector> & urls, const std::string & bearer_token, bool offline) { // Prepare download in parallel std::vector> futures_download; for (auto const & item : urls) { - futures_download.push_back(std::async(std::launch::async, [bearer_token](const std::pair & it) -> bool { - return common_download_file_single(it.first, it.second, bearer_token); + futures_download.push_back(std::async(std::launch::async, [bearer_token, offline](const std::pair & it) -> bool { + return common_download_file_single(it.first, it.second, bearer_token, offline); }, item)); } @@ -481,14 +487,15 @@ static bool common_download_file_multiple(const std::vector> common_remote_get_content(const std::string & * * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. */ -static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) { +static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) { auto parts = string_split(hf_repo_with_tag, ':'); std::string tag = parts.size() > 1 ? parts.back() : "latest"; std::string hf_repo = parts[0]; @@ -638,20 +645,25 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_ long res_code = 0; std::string res_str; bool use_cache = false; - try { - auto res = common_remote_get_content(url, params); - res_code = res.first; - res_str = std::string(res.second.data(), res.second.size()); - } catch (const std::exception & e) { - LOG_WRN("error: failed to get manifest: %s\n", e.what()); - LOG_WRN("try reading from cache\n"); - // try to read from cache + if (!offline) { try { + auto res = common_remote_get_content(url, params); + res_code = res.first; + res_str = std::string(res.second.data(), res.second.size()); + } catch (const std::exception & e) { + LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what()); + } + } + if (res_code == 0) { + if (std::filesystem::exists(cached_response_path)) { + LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str()); res_str = read_file(cached_response_path); res_code = 200; use_cache = true; - } catch (const std::exception & e) { - throw std::runtime_error("error: failed to get manifest (check your internet connection)"); + } else { + throw std::runtime_error( + offline ? "error: failed to get manifest (offline mode)" + : "error: failed to get manifest (check your internet connection)"); } } std::string ggufFile; @@ -710,7 +722,8 @@ static bool common_download_file_multiple(const std::vector default it to --model if (model.hf_file.empty()) { if (model.path.empty()) { - auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token); + auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline); if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) { exit(1); // built without CURL, error message already printed } @@ -791,7 +805,7 @@ static handle_model_result common_params_handle_model( // then, download it if needed if (!model.url.empty()) { - bool ok = common_download_model(model, bearer_token); + bool ok = common_download_model(model, bearer_token, offline); if (!ok) { LOG_ERR("error: failed to download model from %s\n", model.url.c_str()); exit(1); @@ -934,7 +948,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context // handle model and download { - auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH); + auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline); if (params.no_mmproj) { params.mmproj = {}; } else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) { @@ -944,12 +958,12 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context // only download mmproj if the current example is using it for (auto & ex : mmproj_examples) { if (ctx_arg.ex == ex) { - common_params_handle_model(params.mmproj, params.hf_token, ""); + common_params_handle_model(params.mmproj, params.hf_token, "", params.offline); break; } } - common_params_handle_model(params.speculative.model, params.hf_token, ""); - common_params_handle_model(params.vocoder.model, params.hf_token, ""); + common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline); + common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline); } if (params.escape) { @@ -2987,6 +3001,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex common_log_set_verbosity_thold(INT_MAX); } )); + add_opt(common_arg( + {"--offline"}, + "Offline mode: forces use of cache, prevents network access", + [](common_params & params) { + params.offline = true; + } + )); + add_opt(common_arg( {"-lv", "--verbosity", "--log-verbosity"}, "N", "Set the verbosity threshold. Messages with a higher verbosity will be ignored.", diff --git a/common/common.h b/common/common.h index f0c52c314b744..356afe193eff2 100644 --- a/common/common.h +++ b/common/common.h @@ -291,6 +291,7 @@ struct common_params { int32_t verbosity = 0; int32_t control_vector_layer_start = -1; // layer range for control vector int32_t control_vector_layer_end = -1; // layer range for control vector + bool offline = false; int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used. int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line diff --git a/tools/server/tests/utils.py b/tools/server/tests/utils.py index b480801b17abb..ae8ec21d48941 100644 --- a/tools/server/tests/utils.py +++ b/tools/server/tests/utils.py @@ -120,6 +120,8 @@ def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None: "--seed", self.seed, ] + if "OFFLINE" in os.environ: + server_args.append("--offline") if self.model_file: server_args.extend(["--model", self.model_file]) if self.model_url: From 273a72856f3c2851fa5f74201b551306cc6762d3 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 04:31:38 -0700 Subject: [PATCH 2/9] use LLAMA_OFFLINE env for --offline --- common/arg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index bb8754c568f74..8dc1271e7ab2d 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3007,7 +3007,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params) { params.offline = true; } - )); + ).set_env("LLAMA_OFFLINE")); add_opt(common_arg( {"-lv", "--verbosity", "--log-verbosity"}, "N", From bb213c6a373ab026f08b2a37ab7eb3b119308a81 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 08:18:35 -0700 Subject: [PATCH 3/9] fix signatures of ifdef'd function fallbacks Co-Authored-By: ochafik --- common/arg.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 55d1c7cfe1f74..8b2955c2e3a08 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -710,12 +710,12 @@ bool common_has_curl() { return false; } -static bool common_download_file_single(const std::string &, const std::string &, const std::string &) { +static bool common_download_file_single(const std::string &, const std::string &, const std::string &, bool) { LOG_ERR("error: built without CURL, cannot download model from internet\n"); return false; } -static bool common_download_file_multiple(const std::vector> &, const std::string &) { +static bool common_download_file_multiple(const std::vector> &, const std::string &, bool) { LOG_ERR("error: built without CURL, cannot download model from the internet\n"); return false; } @@ -728,7 +728,7 @@ static bool common_download_model( return false; } -static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &) { +static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) { LOG_ERR("error: built without CURL, cannot download model from the internet\n"); return {}; } From f7439cbfa6db8f38387fc23db6c380af096b1614 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 08:31:00 -0700 Subject: [PATCH 4/9] nit spaces Co-Authored-By: ochafik --- common/arg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 8b2955c2e3a08..40491d0ca4657 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -285,7 +285,7 @@ static bool common_download_file_single(const std::string & url, const std::stri common_load_model_from_url_headers headers; bool head_request_ok = false; - + if (!file_exists || !offline) { bool should_download = !file_exists; // by default, we should download if the file does not exist @@ -3017,7 +3017,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.offline = true; } ).set_env("LLAMA_OFFLINE")); - + add_opt(common_arg( {"-lv", "--verbosity", "--log-verbosity"}, "N", "Set the verbosity threshold. Messages with a higher verbosity will be ignored.", From 31ea1b86c06e496a47ce9321e76516bd3098e8f5 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 09:09:09 -0700 Subject: [PATCH 5/9] Update arg.cpp Co-Authored-By: ochafik --- common/arg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index 40491d0ca4657..ebe205414d1de 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -723,7 +723,7 @@ static bool common_download_file_multiple(const std::vector Date: Mon, 26 May 2025 13:56:12 -0700 Subject: [PATCH 6/9] refactor as suggested Co-Authored-By: ochafik --- common/arg.cpp | 289 +++++++++++++++++++++++++------------------------ 1 file changed, 146 insertions(+), 143 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index ebe205414d1de..7de8a07c88b3d 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -283,182 +283,185 @@ static bool common_download_file_single(const std::string & url, const std::stri std::string last_modified; }; + if (offline) { + if (file_exists) { + LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); + return true; // skip verification/downloading + } + LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); + return false; + } + common_load_model_from_url_headers headers; bool head_request_ok = false; + bool should_download = !file_exists; // by default, we should download if the file does not exist - if (!file_exists || !offline) - { - bool should_download = !file_exists; // by default, we should download if the file does not exist - - // Initialize libcurl - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; - if (!curl) { - LOG_ERR("%s: error initializing libcurl\n", __func__); - return false; - } + // Initialize libcurl + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + curl_slist_ptr http_headers; + if (!curl) { + LOG_ERR("%s: error initializing libcurl\n", __func__); + return false; + } - // Set the URL, allow to follow http redirection - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); - http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); - // Check if hf-token or bearer-token was specified - if (!bearer_token.empty()) { - std::string auth_header = "Authorization: Bearer " + bearer_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - } - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); + http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); + // Check if hf-token or bearer-token was specified + if (!bearer_token.empty()) { + std::string auth_header = "Authorization: Bearer " + bearer_token; + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + } + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); #if defined(_WIN32) - // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of - // operating system. Currently implemented under MS-Windows. - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); #endif - typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); - auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { - common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); + auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { + common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; - static std::regex header_regex("([^:]+): (.*)\r\n"); - static std::regex etag_regex("ETag", std::regex_constants::icase); - static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + static std::regex header_regex("([^:]+): (.*)\r\n"); + static std::regex etag_regex("ETag", std::regex_constants::icase); + static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); - std::string header(buffer, n_items); - std::smatch match; - if (std::regex_match(header, match, header_regex)) { - const std::string & key = match[1]; - const std::string & value = match[2]; - if (std::regex_match(key, match, etag_regex)) { - headers->etag = value; - } else if (std::regex_match(key, match, last_modified_regex)) { - headers->last_modified = value; - } + std::string header(buffer, n_items); + std::smatch match; + if (std::regex_match(header, match, header_regex)) { + const std::string & key = match[1]; + const std::string & value = match[2]; + if (std::regex_match(key, match, etag_regex)) { + headers->etag = value; + } else if (std::regex_match(key, match, last_modified_regex)) { + headers->last_modified = value; } - return n_items; - }; - - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress - curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); - curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); - - // we only allow retrying once for HEAD requests - // this is for the use case of using running offline (no internet), retrying can be annoying - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD"); - if (!was_perform_successful) { - head_request_ok = false; } + return n_items; + }; - long http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code == 200) { - head_request_ok = true; - } else { - LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); - head_request_ok = false; - } - - // if head_request_ok is false, we don't have the etag or last-modified headers - // we leave should_download as-is, which is true if the file does not exist - if (head_request_ok) { - // check if ETag or Last-Modified headers are different - // if it is, we need to download the file again - if (!etag.empty() && etag != headers.etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); - should_download = true; - } else if (!last_modified.empty() && last_modified != headers.last_modified) { - LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); - should_download = true; - } - } - - if (should_download) { - std::string path_temporary = path + ".downloadInProgress"; - if (file_exists) { - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; - } - } + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress + curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); + curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); - // Set the output file + // we only allow retrying once for HEAD requests + // this is for the use case of using running offline (no internet), retrying can be annoying + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD"); + if (!was_perform_successful) { + head_request_ok = false; + } - struct FILE_deleter { - void operator()(FILE * f) const { - fclose(f); - } - }; + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code == 200) { + head_request_ok = true; + } else { + LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + head_request_ok = false; + } - std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); - if (!outfile) { - LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); + // if head_request_ok is false, we don't have the etag or last-modified headers + // we leave should_download as-is, which is true if the file does not exist + if (head_request_ok) { + // check if ETag or Last-Modified headers are different + // if it is, we need to download the file again + if (!etag.empty() && etag != headers.etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); + should_download = true; + } else if (!last_modified.empty() && last_modified != headers.last_modified) { + LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); + should_download = true; + } + } + + if (should_download) { + std::string path_temporary = path + ".downloadInProgress"; + if (file_exists) { + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); return false; } + } - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); - auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { - return fwrite(data, size, nmemb, (FILE *)fd); - }; - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); + // Set the output file - // display download progress - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); + struct FILE_deleter { + void operator()(FILE * f) const { + fclose(f); + } + }; - // helper function to hide password in URL - auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { - std::size_t protocol_pos = url.find("://"); - if (protocol_pos == std::string::npos) { - return url; // Malformed URL - } + std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); + if (!outfile) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); + return false; + } - std::size_t at_pos = url.find('@', protocol_pos + 3); - if (at_pos == std::string::npos) { - return url; // No password in URL - } + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { + return fwrite(data, size, nmemb, (FILE *)fd); + }; + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); - return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); - }; + // display download progress + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); - // start the download - LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, - llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET"); - if (!was_perform_successful) { - return false; + // helper function to hide password in URL + auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { + std::size_t protocol_pos = url.find("://"); + if (protocol_pos == std::string::npos) { + return url; // Malformed URL } - long http_code = 0; - curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { - LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; + std::size_t at_pos = url.find('@', protocol_pos + 3); + if (at_pos == std::string::npos) { + return url; // No password in URL } - // Causes file to be closed explicitly here before we rename it. - outfile.reset(); + return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); + }; - // Write the updated JSON metadata file. - metadata.update({ - {"url", url}, - {"etag", headers.etag}, - {"lastModified", headers.last_modified} - }); - write_file(metadata_path, metadata.dump(4)); - LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + // start the download + LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, + llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET"); + if (!was_perform_successful) { + return false; + } - if (rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; - } - } else { - LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); + long http_code = 0; + curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); + return false; + } + + // Causes file to be closed explicitly here before we rename it. + outfile.reset(); + + // Write the updated JSON metadata file. + metadata.update({ + {"url", url}, + {"etag", headers.etag}, + {"lastModified", headers.last_modified} + }); + write_file(metadata_path, metadata.dump(4)); + LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; } } else { - LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); + LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); } return true; From f8074b9d66cf0be761c6318f9001310b5c10e1f1 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 14:07:52 -0700 Subject: [PATCH 7/9] merge branches Co-Authored-By: ochafik --- common/arg.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 7de8a07c88b3d..001a87cfc1866 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -253,6 +253,10 @@ static bool common_download_file_single(const std::string & url, const std::stri std::string last_modified; if (file_exists) { + if (offline) { + LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); + return true; // skip verification/downloading + } // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). std::ifstream metadata_in(metadata_path); if (metadata_in.good()) { @@ -271,10 +275,11 @@ static bool common_download_file_single(const std::string & url, const std::stri } // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again) } else { - LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); if (offline) { + LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); return false; } + LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); } // Send a HEAD request to retrieve the etag and last-modified headers @@ -283,15 +288,6 @@ static bool common_download_file_single(const std::string & url, const std::stri std::string last_modified; }; - if (offline) { - if (file_exists) { - LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); - return true; // skip verification/downloading - } - LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); - return false; - } - common_load_model_from_url_headers headers; bool head_request_ok = false; bool should_download = !file_exists; // by default, we should download if the file does not exist From 2eb3c69972d225e32786f0735802dc0092631c77 Mon Sep 17 00:00:00 2001 From: ochafik Date: Mon, 26 May 2025 14:32:50 -0700 Subject: [PATCH 8/9] rm OFFLINE from server tests (LLAMA_OFFLINE=1 works) Co-Authored-By: ochafik --- tools/server/tests/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/server/tests/utils.py b/tools/server/tests/utils.py index 785fe99d09a63..f7e1b3b3b7b8e 100644 --- a/tools/server/tests/utils.py +++ b/tools/server/tests/utils.py @@ -121,8 +121,6 @@ def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None: "--seed", self.seed, ] - if "OFFLINE" in os.environ: - server_args.append("--offline") if self.model_file: server_args.extend(["--model", self.model_file]) if self.model_url: From 153a0b87705d2d92a2753be6be2671fc8454e572 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Mon, 26 May 2025 22:33:38 +0100 Subject: [PATCH 9/9] Update common/arg.cpp Co-authored-by: Xuan-Son Nguyen --- common/arg.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index 001a87cfc1866..69a58364f9b60 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3016,7 +3016,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.offline = true; } ).set_env("LLAMA_OFFLINE")); - add_opt(common_arg( {"-lv", "--verbosity", "--log-verbosity"}, "N", "Set the verbosity threshold. Messages with a higher verbosity will be ignored.",