@@ -242,33 +242,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
242242}
243243
244244//  download one single file from remote URL to local path
245- static  bool  common_download_file_single (const  std::string & url, const  std::string & path, const  std::string & bearer_token) {
246-     //  Initialize libcurl
247-     curl_ptr       curl (curl_easy_init (), &curl_easy_cleanup);
248-     curl_slist_ptr http_headers;
249-     if  (!curl) {
250-         LOG_ERR (" %s: error initializing libcurl\n "  , __func__);
251-         return  false ;
252-     }
253- 
254-     //  Set the URL, allow to follow http redirection
255-     curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
256-     curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
257- 
258-     http_headers.ptr  = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp"  );
259-     //  Check if hf-token or bearer-token was specified
260-     if  (!bearer_token.empty ()) {
261-         std::string auth_header = " Authorization: Bearer "   + bearer_token;
262-         http_headers.ptr  = curl_slist_append (http_headers.ptr , auth_header.c_str ());
263-     }
264-     curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
265- 
266- #if  defined(_WIN32)
267-     //  CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
268-     //    operating system. Currently implemented under MS-Windows.
269-     curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
270- #endif 
271- 
245+ static  bool  common_download_file_single (const  std::string & url, const  std::string & path, const  std::string & bearer_token, bool  offline) {
272246    //  Check if the file already exists locally
273247    auto  file_exists = std::filesystem::exists (path);
274248
@@ -279,6 +253,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
279253    std::string last_modified;
280254
281255    if  (file_exists) {
256+         if  (offline) {
257+             LOG_INF (" %s: using cached file (offline mode): %s\n "  , __func__, path.c_str ());
258+             return  true ; //  skip verification/downloading
259+         }
282260        //  Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
283261        std::ifstream metadata_in (metadata_path);
284262        if  (metadata_in.good ()) {
@@ -297,6 +275,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
297275        }
298276        //  if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
299277    } else  {
278+         if  (offline) {
279+             LOG_ERR (" %s: required file is not available in cache (offline mode): %s\n "  , __func__, path.c_str ());
280+             return  false ;
281+         }
300282        LOG_INF (" %s: no previous model file found %s\n "  , __func__, path.c_str ());
301283    }
302284
@@ -310,50 +292,73 @@ static bool common_download_file_single(const std::string & url, const std::stri
310292    bool  head_request_ok = false ;
311293    bool  should_download = !file_exists; //  by default, we should download if the file does not exist
312294
313-     //  get ETag to see if the remote file has changed
314-     {
315-         typedef  size_t (*CURLOPT_HEADERFUNCTION_PTR)(char  *, size_t , size_t , void  *);
316-         auto  header_callback = [](char  * buffer, size_t  /* size*/  , size_t  n_items, void  * userdata) -> size_t  {
317-             common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
295+     //  Initialize libcurl
296+     curl_ptr       curl (curl_easy_init (), &curl_easy_cleanup);
297+     curl_slist_ptr http_headers;
298+     if  (!curl) {
299+         LOG_ERR (" %s: error initializing libcurl\n "  , __func__);
300+         return  false ;
301+     }
302+ 
303+     //  Set the URL, allow to follow http redirection
304+     curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
305+     curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
318306
319-             static  std::regex header_regex (" ([^:]+): (.*)\r\n "  );
320-             static  std::regex etag_regex (" ETag"  , std::regex_constants::icase);
321-             static  std::regex last_modified_regex (" Last-Modified"  , std::regex_constants::icase);
307+     http_headers.ptr  = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp"  );
308+     //  Check if hf-token or bearer-token was specified
309+     if  (!bearer_token.empty ()) {
310+         std::string auth_header = " Authorization: Bearer "   + bearer_token;
311+         http_headers.ptr  = curl_slist_append (http_headers.ptr , auth_header.c_str ());
312+     }
313+     curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
322314
323-             std::string header (buffer, n_items);
324-             std::smatch match;
325-             if  (std::regex_match (header, match, header_regex)) {
326-                 const  std::string & key = match[1 ];
327-                 const  std::string & value = match[2 ];
328-                 if  (std::regex_match (key, match, etag_regex)) {
329-                     headers->etag  = value;
330-                 } else  if  (std::regex_match (key, match, last_modified_regex)) {
331-                     headers->last_modified  = value;
332-                 }
333-             }
334-             return  n_items;
335-         };
315+ #if  defined(_WIN32)
316+     //  CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
317+     //    operating system. Currently implemented under MS-Windows.
318+     curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
319+ #endif 
336320
337-         curl_easy_setopt (curl.get (), CURLOPT_NOBODY, 1L ); //  will trigger the HEAD verb
338-         curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); //  hide head request progress
339-         curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
340-         curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
321+     typedef  size_t (*CURLOPT_HEADERFUNCTION_PTR)(char  *, size_t , size_t , void  *);
322+     auto  header_callback = [](char  * buffer, size_t  /* size*/  , size_t  n_items, void  * userdata) -> size_t  {
323+         common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
341324
342-         //  we only allow retrying once for HEAD requests
343-         //  this is for the use case of using running offline (no internet), retrying can be annoying
344-         bool  was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 , " HEAD"  );
345-         if  (!was_perform_successful) {
346-             head_request_ok = false ;
347-         }
325+         static  std::regex header_regex (" ([^:]+): (.*)\r\n "  );
326+         static  std::regex etag_regex (" ETag"  , std::regex_constants::icase);
327+         static  std::regex last_modified_regex (" Last-Modified"  , std::regex_constants::icase);
348328
349-         long  http_code = 0 ;
350-         curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
351-         if  (http_code == 200 ) {
352-             head_request_ok = true ;
353-         } else  {
354-             LOG_WRN (" %s: HEAD invalid http status code received: %ld\n "  , __func__, http_code);
355-             head_request_ok = false ;
329+         std::string header (buffer, n_items);
330+         std::smatch match;
331+         if  (std::regex_match (header, match, header_regex)) {
332+             const  std::string & key = match[1 ];
333+             const  std::string & value = match[2 ];
334+             if  (std::regex_match (key, match, etag_regex)) {
335+                 headers->etag  = value;
336+             } else  if  (std::regex_match (key, match, last_modified_regex)) {
337+                 headers->last_modified  = value;
338+             }
356339        }
340+         return  n_items;
341+     };
342+ 
343+     curl_easy_setopt (curl.get (), CURLOPT_NOBODY, 1L ); //  will trigger the HEAD verb
344+     curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); //  hide head request progress
345+     curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
346+     curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
347+ 
348+     //  we only allow retrying once for HEAD requests
349+     //  this is for the use case of using running offline (no internet), retrying can be annoying
350+     bool  was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 , " HEAD"  );
351+     if  (!was_perform_successful) {
352+         head_request_ok = false ;
353+     }
354+ 
355+     long  http_code = 0 ;
356+     curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
357+     if  (http_code == 200 ) {
358+         head_request_ok = true ;
359+     } else  {
360+         LOG_WRN (" %s: HEAD invalid http status code received: %ld\n "  , __func__, http_code);
361+         head_request_ok = false ;
357362    }
358363
359364    //  if head_request_ok is false, we don't have the etag or last-modified headers
@@ -460,12 +465,12 @@ static bool common_download_file_single(const std::string & url, const std::stri
460465
461466//  download multiple files from remote URLs to local paths
462467//  the input is a vector of pairs <url, path>
463- static  bool  common_download_file_multiple (const  std::vector<std::pair<std::string, std::string>> & urls, const  std::string & bearer_token) {
468+ static  bool  common_download_file_multiple (const  std::vector<std::pair<std::string, std::string>> & urls, const  std::string & bearer_token,  bool  offline ) {
464469    //  Prepare download in parallel
465470    std::vector<std::future<bool >> futures_download;
466471    for  (auto  const  & item : urls) {
467-         futures_download.push_back (std::async (std::launch::async, [bearer_token](const  std::pair<std::string, std::string> & it) -> bool  {
468-             return  common_download_file_single (it.first , it.second , bearer_token);
472+         futures_download.push_back (std::async (std::launch::async, [bearer_token, offline ](const  std::pair<std::string, std::string> & it) -> bool  {
473+             return  common_download_file_single (it.first , it.second , bearer_token, offline );
469474        }, item));
470475    }
471476
@@ -481,14 +486,15 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
481486
482487static  bool  common_download_model (
483488        const  common_params_model & model,
484-         const  std::string & bearer_token) {
489+         const  std::string & bearer_token,
490+         bool  offline) {
485491    //  Basic validation of the model.url
486492    if  (model.url .empty ()) {
487493        LOG_ERR (" %s: invalid model url\n "  , __func__);
488494        return  false ;
489495    }
490496
491-     if  (!common_download_file_single (model.url , model.path , bearer_token)) {
497+     if  (!common_download_file_single (model.url , model.path , bearer_token, offline )) {
492498        return  false ;
493499    }
494500
@@ -547,7 +553,7 @@ static bool common_download_model(
547553        }
548554
549555        //  Download in parallel
550-         common_download_file_multiple (urls, bearer_token);
556+         common_download_file_multiple (urls, bearer_token, offline );
551557    }
552558
553559    return  true ;
@@ -608,7 +614,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
608614 * 
609615 * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. 
610616 */  
611- static  struct  common_hf_file_res  common_get_hf_file (const  std::string & hf_repo_with_tag, const  std::string & bearer_token) {
617+ static  struct  common_hf_file_res  common_get_hf_file (const  std::string & hf_repo_with_tag, const  std::string & bearer_token,  bool  offline ) {
612618    auto  parts = string_split<std::string>(hf_repo_with_tag, ' :'  );
613619    std::string tag = parts.size () > 1  ? parts.back () : " latest"  ;
614620    std::string hf_repo = parts[0 ];
@@ -638,20 +644,25 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
638644    long  res_code = 0 ;
639645    std::string res_str;
640646    bool  use_cache = false ;
641-     try  {
642-         auto  res = common_remote_get_content (url, params);
643-         res_code = res.first ;
644-         res_str = std::string (res.second .data (), res.second .size ());
645-     } catch  (const  std::exception & e) {
646-         LOG_WRN (" error: failed to get manifest: %s\n "  , e.what ());
647-         LOG_WRN (" try reading from cache\n "  );
648-         //  try to read from cache
647+     if  (!offline) {
649648        try  {
649+             auto  res = common_remote_get_content (url, params);
650+             res_code = res.first ;
651+             res_str = std::string (res.second .data (), res.second .size ());
652+         } catch  (const  std::exception & e) {
653+             LOG_WRN (" error: failed to get manifest at %s: %s\n "  , url.c_str (), e.what ());
654+         }
655+     }
656+     if  (res_code == 0 ) {
657+         if  (std::filesystem::exists (cached_response_path)) {
658+             LOG_WRN (" trying to read manifest from cache: %s\n "  , cached_response_path.c_str ());
650659            res_str = read_file (cached_response_path);
651660            res_code = 200 ;
652661            use_cache = true ;
653-         } catch  (const  std::exception & e) {
654-             throw  std::runtime_error (" error: failed to get manifest (check your internet connection)"  );
662+         } else  {
663+             throw  std::runtime_error (
664+                 offline ? " error: failed to get manifest (offline mode)" 
665+                 : " error: failed to get manifest (check your internet connection)"  );
655666        }
656667    }
657668    std::string ggufFile;
@@ -698,24 +709,25 @@ bool common_has_curl() {
698709    return  false ;
699710}
700711
701- static  bool  common_download_file_single (const  std::string &, const  std::string &, const  std::string &) {
712+ static  bool  common_download_file_single (const  std::string &, const  std::string &, const  std::string &,  bool ) {
702713    LOG_ERR (" error: built without CURL, cannot download model from internet\n "  );
703714    return  false ;
704715}
705716
706- static  bool  common_download_file_multiple (const  std::vector<std::pair<std::string, std::string>> &, const  std::string &) {
717+ static  bool  common_download_file_multiple (const  std::vector<std::pair<std::string, std::string>> &, const  std::string &,  bool ) {
707718    LOG_ERR (" error: built without CURL, cannot download model from the internet\n "  );
708719    return  false ;
709720}
710721
711722static  bool  common_download_model (
712723        const  common_params_model &,
713-         const  std::string &) {
724+         const  std::string &,
725+         bool ) {
714726    LOG_ERR (" error: built without CURL, cannot download model from the internet\n "  );
715727    return  false ;
716728}
717729
718- static  struct  common_hf_file_res  common_get_hf_file (const  std::string &, const  std::string &) {
730+ static  struct  common_hf_file_res  common_get_hf_file (const  std::string &, const  std::string &,  bool ) {
719731    LOG_ERR (" error: built without CURL, cannot download model from the internet\n "  );
720732    return  {};
721733}
@@ -742,15 +754,16 @@ struct handle_model_result {
742754static  handle_model_result common_params_handle_model (
743755        struct  common_params_model  & model,
744756        const  std::string & bearer_token,
745-         const  std::string & model_path_default) {
757+         const  std::string & model_path_default,
758+         bool  offline) {
746759    handle_model_result result;
747760    //  handle pre-fill default model path and url based on hf_repo and hf_file
748761    {
749762        if  (!model.hf_repo .empty ()) {
750763            //  short-hand to avoid specifying --hf-file -> default it to --model
751764            if  (model.hf_file .empty ()) {
752765                if  (model.path .empty ()) {
753-                     auto  auto_detected = common_get_hf_file (model.hf_repo , bearer_token);
766+                     auto  auto_detected = common_get_hf_file (model.hf_repo , bearer_token, offline );
754767                    if  (auto_detected.repo .empty () || auto_detected.ggufFile .empty ()) {
755768                        exit (1 ); //  built without CURL, error message already printed
756769                    }
@@ -791,7 +804,7 @@ static handle_model_result common_params_handle_model(
791804
792805    //  then, download it if needed
793806    if  (!model.url .empty ()) {
794-         bool  ok = common_download_model (model, bearer_token);
807+         bool  ok = common_download_model (model, bearer_token, offline );
795808        if  (!ok) {
796809            LOG_ERR (" error: failed to download model from %s\n "  , model.url .c_str ());
797810            exit (1 );
@@ -934,7 +947,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
934947
935948    //  handle model and download
936949    {
937-         auto  res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH);
950+         auto  res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH, params. offline );
938951        if  (params.no_mmproj ) {
939952            params.mmproj  = {};
940953        } else  if  (res.found_mmproj  && params.mmproj .path .empty () && params.mmproj .url .empty ()) {
@@ -944,12 +957,12 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
944957        //  only download mmproj if the current example is using it
945958        for  (auto  & ex : mmproj_examples) {
946959            if  (ctx_arg.ex  == ex) {
947-                 common_params_handle_model (params.mmproj ,    params.hf_token , " "  );
960+                 common_params_handle_model (params.mmproj ,    params.hf_token , " " , params. offline );
948961                break ;
949962            }
950963        }
951-         common_params_handle_model (params.speculative .model , params.hf_token , " "  );
952-         common_params_handle_model (params.vocoder .model ,     params.hf_token , " "  );
964+         common_params_handle_model (params.speculative .model , params.hf_token , " " , params. offline );
965+         common_params_handle_model (params.vocoder .model ,     params.hf_token , " " , params. offline );
953966    }
954967
955968    if  (params.escape ) {
@@ -2996,6 +3009,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
29963009            common_log_set_verbosity_thold (INT_MAX);
29973010        }
29983011    ));
3012+     add_opt (common_arg (
3013+         {" --offline"  },
3014+         " Offline mode: forces use of cache, prevents network access"  ,
3015+         [](common_params & params) {
3016+             params.offline  = true ;
3017+         }
3018+     ).set_env (" LLAMA_OFFLINE"  ));
29993019    add_opt (common_arg (
30003020        {" -lv"  , " --verbosity"  , " --log-verbosity"  }, " N"  ,
30013021        " Set the verbosity threshold. Messages with a higher verbosity will be ignored."  ,
0 commit comments