@@ -44,6 +44,25 @@ std::initializer_list<enum llama_example> mmproj_examples = {
4444 // TODO: add LLAMA_EXAMPLE_SERVER when it's ready
4545};
4646
47+ static std::string read_file (const std::string & fname) {
48+ std::ifstream file (fname);
49+ if (!file) {
50+ throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , fname.c_str ()));
51+ }
52+ std::string content ((std::istreambuf_iterator<char >(file)), std::istreambuf_iterator<char >());
53+ file.close ();
54+ return content;
55+ }
56+
57+ static void write_file (const std::string & fname, const std::string & content) {
58+ std::ofstream file (fname);
59+ if (!file) {
60+ throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , fname.c_str ()));
61+ }
62+ file << content;
63+ file.close ();
64+ }
65+
4766common_arg & common_arg::set_examples (std::initializer_list<enum llama_example> examples) {
4867 this ->examples = std::move (examples);
4968 return *this ;
@@ -201,9 +220,11 @@ struct curl_slist_ptr {
201220
202221static bool curl_perform_with_retry (const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
203222 int remaining_attempts = max_attempts;
223+ char * method = nullptr ;
224+ curl_easy_getinfo (curl, CURLINFO_EFFECTIVE_METHOD, &method);
204225
205226 while (remaining_attempts > 0 ) {
206- LOG_INF (" %s: Trying to download from %s (attempt %d of %d)...\n " , __func__ , url.c_str (), max_attempts - remaining_attempts + 1 , max_attempts);
227+ LOG_INF (" %s: %s %s (attempt %d of %d)...\n " , __func__ , method , url.c_str (), max_attempts - remaining_attempts + 1 , max_attempts);
207228
208229 CURLcode res = curl_easy_perform (curl);
209230 if (res == CURLE_OK) {
@@ -214,6 +235,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
214235 LOG_WRN (" %s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n " , __func__, curl_easy_strerror (res), exponential_backoff_delay);
215236
216237 remaining_attempts--;
238+ if (remaining_attempts == 0 ) break ;
217239 std::this_thread::sleep_for (std::chrono::milliseconds (exponential_backoff_delay));
218240 }
219241
@@ -232,8 +254,6 @@ static bool common_download_file_single(const std::string & url, const std::stri
232254 return false ;
233255 }
234256
235- bool force_download = false ;
236-
237257 // Set the URL, allow to follow http redirection
238258 curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
239259 curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
@@ -257,7 +277,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
257277
258278 // If the file exists, check its JSON metadata companion file.
259279 std::string metadata_path = path + " .json" ;
260- nlohmann::json metadata;
280+ nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
261281 std::string etag;
262282 std::string last_modified;
263283
@@ -267,7 +287,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
267287 if (metadata_in.good ()) {
268288 try {
269289 metadata_in >> metadata;
270- LOG_INF (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (), metadata.dump ().c_str ());
290+ LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (), metadata.dump ().c_str ());
271291 if (metadata.contains (" url" ) && metadata.at (" url" ).is_string ()) {
272292 auto previous_url = metadata.at (" url" ).get <std::string>();
273293 if (previous_url != url) {
@@ -297,7 +317,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
297317 };
298318
299319 common_load_model_from_url_headers headers;
320+ bool head_request_ok = false ;
321+ bool should_download = !file_exists; // by default, we should download if the file does not exist
300322
323+ // get ETag to see if the remote file has changed
301324 {
302325 typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
303326 auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
@@ -326,23 +349,28 @@ static bool common_download_file_single(const std::string & url, const std::stri
326349 curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
327350 curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
328351
329- bool was_perform_successful = curl_perform_with_retry (url, curl.get (), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
352+ // we only allow retrying once for HEAD requests
353+ // this is for the use case of using running offline (no internet), retrying can be annoying
354+ bool was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 );
330355 if (!was_perform_successful) {
331- return false ;
356+ head_request_ok = false ;
332357 }
333358
334359 long http_code = 0 ;
335360 curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
336- if (http_code ! = 200 ) {
337- // HEAD not supported, we don't know if the file has changed
338- // force trigger downloading
339- force_download = true ;
340- LOG_ERR ( " %s: HEAD invalid http status code received: %ld \n " , __func__, http_code) ;
361+ if (http_code = = 200 ) {
362+ head_request_ok = true ;
363+ } else {
364+ LOG_WRN ( " %s: HEAD invalid http status code received: %ld \n " , __func__, http_code) ;
365+ head_request_ok = false ;
341366 }
342367 }
343368
344- bool should_download = !file_exists || force_download;
345- if (!should_download) {
369+ // if head_request_ok is false, we don't have the etag or last-modified headers
370+ // we leave should_download as-is, which is true if the file does not exist
371+ if (head_request_ok) {
372+ // check if ETag or Last-Modified headers are different
373+ // if it is, we need to download the file again
346374 if (!etag.empty () && etag != headers.etag ) {
347375 LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__, etag.c_str (), headers.etag .c_str ());
348376 should_download = true ;
@@ -351,6 +379,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
351379 should_download = true ;
352380 }
353381 }
382+
354383 if (should_download) {
355384 std::string path_temporary = path + " .downloadInProgress" ;
356385 if (file_exists) {
@@ -425,13 +454,15 @@ static bool common_download_file_single(const std::string & url, const std::stri
425454 {" etag" , headers.etag },
426455 {" lastModified" , headers.last_modified }
427456 });
428- std::ofstream (metadata_path) << metadata.dump (4 );
429- LOG_INF (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
457+ write_file (metadata_path, metadata.dump (4 ) );
458+ LOG_DBG (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
430459
431460 if (rename (path_temporary.c_str (), path.c_str ()) != 0 ) {
432461 LOG_ERR (" %s: unable to rename file: %s to %s\n " , __func__, path_temporary.c_str (), path.c_str ());
433462 return false ;
434463 }
464+ } else {
465+ LOG_INF (" %s: using cached file: %s\n " , __func__, path.c_str ());
435466 }
436467
437468 return true ;
@@ -606,16 +637,37 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
606637 // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
607638 // User-Agent header is already set in common_remote_get_content, no need to set it here
608639
640+ // we use "=" to avoid clashing with other component, while still being allowed on windows
641+ std::string cached_response_fname = " manifest=" + hf_repo + " =" + tag + " .json" ;
642+ string_replace_all (cached_response_fname, " /" , " _" );
643+ std::string cached_response_path = fs_get_cache_file (cached_response_fname);
644+
609645 // make the request
610646 common_remote_params params;
611647 params.headers = headers;
612- auto res = common_remote_get_content (url, params);
613- long res_code = res.first ;
614- std::string res_str (res.second .data (), res.second .size ());
648+ long res_code = 0 ;
649+ std::string res_str;
650+ bool use_cache = false ;
651+ try {
652+ auto res = common_remote_get_content (url, params);
653+ res_code = res.first ;
654+ res_str = std::string (res.second .data (), res.second .size ());
655+ } catch (const std::exception & e) {
656+ LOG_WRN (" error: failed to get manifest: %s\n " , e.what ());
657+ LOG_WRN (" try reading from cache\n " );
658+ // try to read from cache
659+ try {
660+ res_str = read_file (cached_response_path);
661+ res_code = 200 ;
662+ use_cache = true ;
663+ } catch (const std::exception & e) {
664+ throw std::runtime_error (" error: failed to get manifest (check your internet connection)" );
665+ }
666+ }
615667 std::string ggufFile;
616668 std::string mmprojFile;
617669
618- if (res_code == 200 ) {
670+ if (res_code == 200 || res_code == 304 ) {
619671 // extract ggufFile.rfilename in json, using regex
620672 {
621673 std::regex pattern (" \" ggufFile\" [\\ s\\ S]*?\" rfilename\"\\ s*:\\ s*\" ([^\" ]+)\" " );
@@ -632,6 +684,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
632684 mmprojFile = match[1 ].str ();
633685 }
634686 }
687+ if (!use_cache) {
688+ // if not using cached response, update the cache file
689+ write_file (cached_response_path, res_str);
690+ }
635691 } else if (res_code == 401 ) {
636692 throw std::runtime_error (" error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token" );
637693 } else {
@@ -1143,6 +1199,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
11431199 fprintf (stderr, " %s\n " , ex.what ());
11441200 ctx_arg.params = params_org;
11451201 return false ;
1202+ } catch (std::exception & ex) {
1203+ fprintf (stderr, " %s\n " , ex.what ());
1204+ exit (1 ); // for other exceptions, we exit with status code 1
11461205 }
11471206
11481207 return true ;
@@ -1443,13 +1502,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
14431502 {" -f" , " --file" }, " FNAME" ,
14441503 " a file containing the prompt (default: none)" ,
14451504 [](common_params & params, const std::string & value) {
1446- std::ifstream file (value);
1447- if (!file) {
1448- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
1449- }
1505+ params.prompt = read_file (value);
14501506 // store the external file name in params
14511507 params.prompt_file = value;
1452- std::copy (std::istreambuf_iterator<char >(file), std::istreambuf_iterator<char >(), back_inserter (params.prompt ));
14531508 if (!params.prompt .empty () && params.prompt .back () == ' \n ' ) {
14541509 params.prompt .pop_back ();
14551510 }
@@ -1459,11 +1514,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
14591514 {" -sysf" , " --system-prompt-file" }, " FNAME" ,
14601515 " a file containing the system prompt (default: none)" ,
14611516 [](common_params & params, const std::string & value) {
1462- std::ifstream file (value);
1463- if (!file) {
1464- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
1465- }
1466- std::copy (std::istreambuf_iterator<char >(file), std::istreambuf_iterator<char >(), back_inserter (params.system_prompt ));
1517+ params.system_prompt = read_file (value);
14671518 if (!params.system_prompt .empty () && params.system_prompt .back () == ' \n ' ) {
14681519 params.system_prompt .pop_back ();
14691520 }
@@ -1888,15 +1939,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
18881939 {" --grammar-file" }, " FNAME" ,
18891940 " file to read grammar from" ,
18901941 [](common_params & params, const std::string & value) {
1891- std::ifstream file (value);
1892- if (!file) {
1893- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
1894- }
1895- std::copy (
1896- std::istreambuf_iterator<char >(file),
1897- std::istreambuf_iterator<char >(),
1898- std::back_inserter (params.sampling .grammar )
1899- );
1942+ params.sampling .grammar = read_file (value);
19001943 }
19011944 ).set_sparam ());
19021945 add_opt (common_arg (
@@ -2816,14 +2859,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28162859 " list of built-in templates:\n %s" , list_builtin_chat_templates ().c_str ()
28172860 ),
28182861 [](common_params & params, const std::string & value) {
2819- std::ifstream file (value);
2820- if (!file) {
2821- throw std::runtime_error (string_format (" error: failed to open file '%s'\n " , value.c_str ()));
2822- }
2823- std::copy (
2824- std::istreambuf_iterator<char >(file),
2825- std::istreambuf_iterator<char >(),
2826- std::back_inserter (params.chat_template ));
2862+ params.chat_template = read_file (value);
28272863 }
28282864 ).set_examples ({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_CHAT_TEMPLATE_FILE" ));
28292865 add_opt (common_arg (
0 commit comments