@@ -242,33 +242,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
242242}
243243
244244// download one single file from remote URL to local path
245- static bool common_download_file_single (const std::string & url, const std::string & path, const std::string & bearer_token) {
246- // Initialize libcurl
247- curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
248- curl_slist_ptr http_headers;
249- if (!curl) {
250- LOG_ERR (" %s: error initializing libcurl\n " , __func__);
251- return false ;
252- }
253-
254- // Set the URL, allow to follow http redirection
255- curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
256- curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
257-
258- http_headers.ptr = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp" );
259- // Check if hf-token or bearer-token was specified
260- if (!bearer_token.empty ()) {
261- std::string auth_header = " Authorization: Bearer " + bearer_token;
262- http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
263- }
264- curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
265-
266- #if defined(_WIN32)
267- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
268- // operating system. Currently implemented under MS-Windows.
269- curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
270- #endif
271-
245+ static bool common_download_file_single (const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) {
272246 // Check if the file already exists locally
273247 auto file_exists = std::filesystem::exists (path);
274248
@@ -279,6 +253,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
279253 std::string last_modified;
280254
281255 if (file_exists) {
256+ if (offline) {
257+ LOG_INF (" %s: using cached file (offline mode): %s\n " , __func__, path.c_str ());
258+ return true ; // skip verification/downloading
259+ }
282260 // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
283261 std::ifstream metadata_in (metadata_path);
284262 if (metadata_in.good ()) {
@@ -297,6 +275,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
297275 }
298276 // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
299277 } else {
278+ if (offline) {
279+ LOG_ERR (" %s: required file is not available in cache (offline mode): %s\n " , __func__, path.c_str ());
280+ return false ;
281+ }
300282 LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
301283 }
302284
@@ -310,50 +292,73 @@ static bool common_download_file_single(const std::string & url, const std::stri
310292 bool head_request_ok = false ;
311293 bool should_download = !file_exists; // by default, we should download if the file does not exist
312294
313- // get ETag to see if the remote file has changed
314- {
315- typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
316- auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
317- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
295+ // Initialize libcurl
296+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
297+ curl_slist_ptr http_headers;
298+ if (!curl) {
299+ LOG_ERR (" %s: error initializing libcurl\n " , __func__);
300+ return false ;
301+ }
302+
303+ // Set the URL, allow to follow http redirection
304+ curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
305+ curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
318306
319- static std::regex header_regex (" ([^:]+): (.*)\r\n " );
320- static std::regex etag_regex (" ETag" , std::regex_constants::icase);
321- static std::regex last_modified_regex (" Last-Modified" , std::regex_constants::icase);
307+ http_headers.ptr = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp" );
308+ // Check if hf-token or bearer-token was specified
309+ if (!bearer_token.empty ()) {
310+ std::string auth_header = " Authorization: Bearer " + bearer_token;
311+ http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
312+ }
313+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
322314
323- std::string header (buffer, n_items);
324- std::smatch match;
325- if (std::regex_match (header, match, header_regex)) {
326- const std::string & key = match[1 ];
327- const std::string & value = match[2 ];
328- if (std::regex_match (key, match, etag_regex)) {
329- headers->etag = value;
330- } else if (std::regex_match (key, match, last_modified_regex)) {
331- headers->last_modified = value;
332- }
333- }
334- return n_items;
335- };
315+ #if defined(_WIN32)
316+ // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
317+ // operating system. Currently implemented under MS-Windows.
318+ curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
319+ #endif
336320
337- curl_easy_setopt (curl.get (), CURLOPT_NOBODY, 1L ); // will trigger the HEAD verb
338- curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); // hide head request progress
339- curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
340- curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
321+ typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
322+ auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
323+ common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
341324
342- // we only allow retrying once for HEAD requests
343- // this is for the use case of using running offline (no internet), retrying can be annoying
344- bool was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 , " HEAD" );
345- if (!was_perform_successful) {
346- head_request_ok = false ;
347- }
325+ static std::regex header_regex (" ([^:]+): (.*)\r\n " );
326+ static std::regex etag_regex (" ETag" , std::regex_constants::icase);
327+ static std::regex last_modified_regex (" Last-Modified" , std::regex_constants::icase);
348328
349- long http_code = 0 ;
350- curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
351- if (http_code == 200 ) {
352- head_request_ok = true ;
353- } else {
354- LOG_WRN (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
355- head_request_ok = false ;
329+ std::string header (buffer, n_items);
330+ std::smatch match;
331+ if (std::regex_match (header, match, header_regex)) {
332+ const std::string & key = match[1 ];
333+ const std::string & value = match[2 ];
334+ if (std::regex_match (key, match, etag_regex)) {
335+ headers->etag = value;
336+ } else if (std::regex_match (key, match, last_modified_regex)) {
337+ headers->last_modified = value;
338+ }
356339 }
340+ return n_items;
341+ };
342+
343+ curl_easy_setopt (curl.get (), CURLOPT_NOBODY, 1L ); // will trigger the HEAD verb
344+ curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); // hide head request progress
345+ curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
346+ curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
347+
348+ // we only allow retrying once for HEAD requests
349+ // this is for the use case of using running offline (no internet), retrying can be annoying
350+ bool was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 , " HEAD" );
351+ if (!was_perform_successful) {
352+ head_request_ok = false ;
353+ }
354+
355+ long http_code = 0 ;
356+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
357+ if (http_code == 200 ) {
358+ head_request_ok = true ;
359+ } else {
360+ LOG_WRN (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
361+ head_request_ok = false ;
357362 }
358363
359364 // if head_request_ok is false, we don't have the etag or last-modified headers
@@ -460,12 +465,12 @@ static bool common_download_file_single(const std::string & url, const std::stri
460465
461466// download multiple files from remote URLs to local paths
462467// the input is a vector of pairs <url, path>
463- static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token) {
468+ static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline ) {
464469 // Prepare download in parallel
465470 std::vector<std::future<bool >> futures_download;
466471 for (auto const & item : urls) {
467- futures_download.push_back (std::async (std::launch::async, [bearer_token](const std::pair<std::string, std::string> & it) -> bool {
468- return common_download_file_single (it.first , it.second , bearer_token);
472+ futures_download.push_back (std::async (std::launch::async, [bearer_token, offline ](const std::pair<std::string, std::string> & it) -> bool {
473+ return common_download_file_single (it.first , it.second , bearer_token, offline );
469474 }, item));
470475 }
471476
@@ -481,14 +486,15 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
481486
482487static bool common_download_model (
483488 const common_params_model & model,
484- const std::string & bearer_token) {
489+ const std::string & bearer_token,
490+ bool offline) {
485491 // Basic validation of the model.url
486492 if (model.url .empty ()) {
487493 LOG_ERR (" %s: invalid model url\n " , __func__);
488494 return false ;
489495 }
490496
491- if (!common_download_file_single (model.url , model.path , bearer_token)) {
497+ if (!common_download_file_single (model.url , model.path , bearer_token, offline )) {
492498 return false ;
493499 }
494500
@@ -547,7 +553,7 @@ static bool common_download_model(
547553 }
548554
549555 // Download in parallel
550- common_download_file_multiple (urls, bearer_token);
556+ common_download_file_multiple (urls, bearer_token, offline );
551557 }
552558
553559 return true ;
@@ -608,7 +614,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
608614 *
609615 * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
610616 */
611- static struct common_hf_file_res common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & bearer_token) {
617+ static struct common_hf_file_res common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline ) {
612618 auto parts = string_split<std::string>(hf_repo_with_tag, ' :' );
613619 std::string tag = parts.size () > 1 ? parts.back () : " latest" ;
614620 std::string hf_repo = parts[0 ];
@@ -638,20 +644,25 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
638644 long res_code = 0 ;
639645 std::string res_str;
640646 bool use_cache = false ;
641- try {
642- auto res = common_remote_get_content (url, params);
643- res_code = res.first ;
644- res_str = std::string (res.second .data (), res.second .size ());
645- } catch (const std::exception & e) {
646- LOG_WRN (" error: failed to get manifest: %s\n " , e.what ());
647- LOG_WRN (" try reading from cache\n " );
648- // try to read from cache
647+ if (!offline) {
649648 try {
649+ auto res = common_remote_get_content (url, params);
650+ res_code = res.first ;
651+ res_str = std::string (res.second .data (), res.second .size ());
652+ } catch (const std::exception & e) {
653+ LOG_WRN (" error: failed to get manifest at %s: %s\n " , url.c_str (), e.what ());
654+ }
655+ }
656+ if (res_code == 0 ) {
657+ if (std::filesystem::exists (cached_response_path)) {
658+ LOG_WRN (" trying to read manifest from cache: %s\n " , cached_response_path.c_str ());
650659 res_str = read_file (cached_response_path);
651660 res_code = 200 ;
652661 use_cache = true ;
653- } catch (const std::exception & e) {
654- throw std::runtime_error (" error: failed to get manifest (check your internet connection)" );
662+ } else {
663+ throw std::runtime_error (
664+ offline ? " error: failed to get manifest (offline mode)"
665+ : " error: failed to get manifest (check your internet connection)" );
655666 }
656667 }
657668 std::string ggufFile;
@@ -698,24 +709,25 @@ bool common_has_curl() {
698709 return false ;
699710}
700711
701- static bool common_download_file_single (const std::string &, const std::string &, const std::string &) {
712+ static bool common_download_file_single (const std::string &, const std::string &, const std::string &, bool ) {
702713 LOG_ERR (" error: built without CURL, cannot download model from internet\n " );
703714 return false ;
704715}
705716
706- static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> &, const std::string &) {
717+ static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool ) {
707718 LOG_ERR (" error: built without CURL, cannot download model from the internet\n " );
708719 return false ;
709720}
710721
711722static bool common_download_model (
712723 const common_params_model &,
713- const std::string &) {
724+ const std::string &,
725+ bool ) {
714726 LOG_ERR (" error: built without CURL, cannot download model from the internet\n " );
715727 return false ;
716728}
717729
718- static struct common_hf_file_res common_get_hf_file (const std::string &, const std::string &) {
730+ static struct common_hf_file_res common_get_hf_file (const std::string &, const std::string &, bool ) {
719731 LOG_ERR (" error: built without CURL, cannot download model from the internet\n " );
720732 return {};
721733}
@@ -742,15 +754,16 @@ struct handle_model_result {
742754static handle_model_result common_params_handle_model (
743755 struct common_params_model & model,
744756 const std::string & bearer_token,
745- const std::string & model_path_default) {
757+ const std::string & model_path_default,
758+ bool offline) {
746759 handle_model_result result;
747760 // handle pre-fill default model path and url based on hf_repo and hf_file
748761 {
749762 if (!model.hf_repo .empty ()) {
750763 // short-hand to avoid specifying --hf-file -> default it to --model
751764 if (model.hf_file .empty ()) {
752765 if (model.path .empty ()) {
753- auto auto_detected = common_get_hf_file (model.hf_repo , bearer_token);
766+ auto auto_detected = common_get_hf_file (model.hf_repo , bearer_token, offline );
754767 if (auto_detected.repo .empty () || auto_detected.ggufFile .empty ()) {
755768 exit (1 ); // built without CURL, error message already printed
756769 }
@@ -791,7 +804,7 @@ static handle_model_result common_params_handle_model(
791804
792805 // then, download it if needed
793806 if (!model.url .empty ()) {
794- bool ok = common_download_model (model, bearer_token);
807+ bool ok = common_download_model (model, bearer_token, offline );
795808 if (!ok) {
796809 LOG_ERR (" error: failed to download model from %s\n " , model.url .c_str ());
797810 exit (1 );
@@ -934,7 +947,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
934947
935948 // handle model and download
936949 {
937- auto res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH);
950+ auto res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH, params. offline );
938951 if (params.no_mmproj ) {
939952 params.mmproj = {};
940953 } else if (res.found_mmproj && params.mmproj .path .empty () && params.mmproj .url .empty ()) {
@@ -944,12 +957,12 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
944957 // only download mmproj if the current example is using it
945958 for (auto & ex : mmproj_examples) {
946959 if (ctx_arg.ex == ex) {
947- common_params_handle_model (params.mmproj , params.hf_token , " " );
960+ common_params_handle_model (params.mmproj , params.hf_token , " " , params. offline );
948961 break ;
949962 }
950963 }
951- common_params_handle_model (params.speculative .model , params.hf_token , " " );
952- common_params_handle_model (params.vocoder .model , params.hf_token , " " );
964+ common_params_handle_model (params.speculative .model , params.hf_token , " " , params. offline );
965+ common_params_handle_model (params.vocoder .model , params.hf_token , " " , params. offline );
953966 }
954967
955968 if (params.escape ) {
@@ -2996,6 +3009,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
29963009 common_log_set_verbosity_thold (INT_MAX);
29973010 }
29983011 ));
3012+ add_opt (common_arg (
3013+ {" --offline" },
3014+ " Offline mode: forces use of cache, prevents network access" ,
3015+ [](common_params & params) {
3016+ params.offline = true ;
3017+ }
3018+ ).set_env (" LLAMA_OFFLINE" ));
29993019 add_opt (common_arg (
30003020 {" -lv" , " --verbosity" , " --log-verbosity" }, " N" ,
30013021 " Set the verbosity threshold. Messages with a higher verbosity will be ignored." ,
0 commit comments