3939#if defined(LLAMA_USE_CURL)
4040#include < curl/curl.h>
4141#include < curl/easy.h>
42- #else
42+ #elif defined(LLAMA_USE_HTTPLIB)
4343#include < cpp-httplib/httplib.h>
4444#endif
4545
@@ -219,6 +219,53 @@ struct common_hf_file_res {
219219 std::string mmprojFile;
220220};
221221
222+ static void write_etag (const std::string & path, const std::string & etag) {
223+ const std::string etag_path = path + " .etag" ;
224+ write_file (etag_path, etag);
225+ LOG_DBG (" %s: file etag saved: %s\n " , __func__, etag_path.c_str ());
226+ }
227+
228+ static std::string read_etag (const std::string & path) {
229+ std::string none;
230+ const std::string etag_path = path + " .etag" ;
231+
232+ if (std::filesystem::exists (etag_path)) {
233+ std::ifstream etag_in (etag_path);
234+ if (!etag_in) {
235+ LOG_ERR (" %s: could not open .etag file for reading: %s\n " , __func__, etag_path.c_str ());
236+ return none;
237+ }
238+ std::string etag;
239+ std::getline (etag_in, etag);
240+ return etag;
241+ }
242+
243+ // no etag file, but maybe there is an old .json
244+ // remove this code later
245+ const std::string metadata_path = path + " .json" ;
246+
247+ if (std::filesystem::exists (metadata_path)) {
248+ std::ifstream metadata_in (metadata_path);
249+ try {
250+ nlohmann::json metadata_json;
251+ metadata_in >> metadata_json;
252+ LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
253+ metadata_json.dump ().c_str ());
254+ if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
255+ std::string etag = metadata_json.at (" etag" );
256+ write_etag (path, etag);
257+ if (!std::filesystem::remove (metadata_path)) {
258+ LOG_WRN (" %s: failed to delete old .json metadata file: %s\n " , __func__, metadata_path.c_str ());
259+ }
260+ return etag;
261+ }
262+ } catch (const nlohmann::json::exception & e) {
263+ LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
264+ }
265+ }
266+ return none;
267+ }
268+
222269#ifdef LLAMA_USE_CURL
223270
224271bool common_has_curl () {
@@ -375,36 +422,15 @@ static bool common_download_head(CURL * curl,
375422static bool common_download_file_single_online (const std::string & url,
376423 const std::string & path,
377424 const std::string & bearer_token) {
378- // If the file exists, check its JSON metadata companion file.
379- std::string metadata_path = path + " .json" ;
380425 static const int max_attempts = 3 ;
381426 static const int retry_delay_seconds = 2 ;
382427 for (int i = 0 ; i < max_attempts; ++i) {
383- nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
384- std::string etag;
385- std::string last_modified;
428+ std::string etag;
386429
387430 // Check if the file already exists locally
388431 const auto file_exists = std::filesystem::exists (path);
389432 if (file_exists) {
390- // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
391- std::ifstream metadata_in (metadata_path);
392- if (metadata_in.good ()) {
393- try {
394- metadata_in >> metadata;
395- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
396- metadata.dump ().c_str ());
397- if (metadata.contains (" etag" ) && metadata.at (" etag" ).is_string ()) {
398- etag = metadata.at (" etag" );
399- }
400- if (metadata.contains (" lastModified" ) && metadata.at (" lastModified" ).is_string ()) {
401- last_modified = metadata.at (" lastModified" );
402- }
403- } catch (const nlohmann::json::exception & e) {
404- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
405- }
406- }
407- // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
433+ etag = read_etag (path);
408434 } else {
409435 LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
410436 }
@@ -442,11 +468,6 @@ static bool common_download_file_single_online(const std::string & url,
442468 headers.etag .c_str ());
443469 should_download = true ;
444470 should_download_from_scratch = true ;
445- } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
446- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
447- last_modified.c_str (), headers.last_modified .c_str ());
448- should_download = true ;
449- should_download_from_scratch = true ;
450471 }
451472 }
452473
@@ -477,15 +498,9 @@ static bool common_download_file_single_online(const std::string & url,
477498 }
478499 }
479500 }
480-
481- // Write the updated JSON metadata file.
482- metadata.update ({
483- { " url" , url },
484- { " etag" , headers.etag },
485- { " lastModified" , headers.last_modified }
486- });
487- write_file (metadata_path, metadata.dump (4 ));
488- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
501+ if (head_request_ok) {
502+ write_etag (path, headers.etag );
503+ }
489504
490505 // start the download
491506 LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
@@ -572,6 +587,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
572587
573588#else
574589
590+ #ifdef LLAMA_USE_HTTPLIB
591+
575592bool common_has_curl () {
576593 return false ;
577594}
@@ -666,51 +683,6 @@ static void print_progress(size_t current, size_t total) { // TODO isatty
666683 std::cout.flush ();
667684}
668685
669- struct common_file_metadata {
670- std::string etag;
671- std::string last_modified;
672- };
673-
674- static std::optional<common_file_metadata> read_metadata (const std::string & path) {
675- if (!std::filesystem::exists (path)) {
676- return std::nullopt ;
677- }
678-
679- nlohmann::json metadata_json;
680- common_file_metadata metadata;
681-
682- std::ifstream metadata_in (path);
683- try {
684- metadata_in >> metadata_json;
685- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, path.c_str (),
686- metadata_json.dump ().c_str ());
687- if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
688- metadata.etag = metadata_json.at (" etag" );
689- }
690- if (metadata_json.contains (" lastModified" ) && metadata_json.at (" lastModified" ).is_string ()) {
691- metadata.last_modified = metadata_json.at (" lastModified" );
692- }
693- } catch (const nlohmann::json::exception & e) {
694- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, path.c_str (), e.what ());
695- return std::nullopt ;
696- }
697-
698- return metadata;
699- }
700-
701- static void write_metadata (const std::string & path,
702- const std::string & url,
703- const common_file_metadata & metadata) {
704- nlohmann::json metadata_json = {
705- { " url" , url },
706- { " etag" , metadata.etag },
707- { " lastModified" , metadata.last_modified }
708- };
709-
710- write_file (path, metadata_json.dump (4 ));
711- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, path.c_str ());
712- }
713-
714686static bool common_pull_file (httplib::Client & cli,
715687 const std::string & resolve_path,
716688 const std::string & path_tmp,
@@ -777,8 +749,6 @@ static bool common_pull_file(httplib::Client & cli,
777749static bool common_download_file_single_online (const std::string & url,
778750 const std::string & path,
779751 const std::string & bearer_token) {
780- // If the file exists, check its JSON metadata companion file.
781- std::string metadata_path = path + " .json" ;
782752 static const int max_attempts = 3 ;
783753 static const int retry_delay_seconds = 2 ;
784754
@@ -790,12 +760,11 @@ static bool common_download_file_single_online(const std::string & url,
790760 }
791761 cli.set_default_headers (default_headers);
792762
793- common_file_metadata last;
794763 const bool file_exists = std::filesystem::exists (path);
764+
765+ std::string last_etag;
795766 if (file_exists) {
796- if (auto opt = read_metadata (metadata_path)) {
797- last = *opt;
798- }
767+ last_etag = read_etag (path);
799768 } else {
800769 LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
801770 }
@@ -811,14 +780,9 @@ static bool common_download_file_single_online(const std::string & url,
811780 }
812781 }
813782
814- common_file_metadata current;
815- if (head_ok) {
816- if (head->has_header (" ETag" )) {
817- current.etag = head->get_header_value (" ETag" );
818- }
819- if (head->has_header (" Last-Modified" )) {
820- current.last_modified = head->get_header_value (" Last-Modified" );
821- }
783+ std::string etag;
784+ if (head_ok && head->has_header (" ETag" )) {
785+ etag = head->get_header_value (" ETag" );
822786 }
823787
824788 size_t total_size = 0 ;
@@ -836,16 +800,10 @@ static bool common_download_file_single_online(const std::string & url,
836800 }
837801
838802 bool should_download_from_scratch = false ;
839- if (head_ok) {
840- if (!last.etag .empty () && last.etag != current.etag ) {
841- LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
842- last.etag .c_str (), current.etag .c_str ());
843- should_download_from_scratch = true ;
844- } else if (!last.last_modified .empty () && last.last_modified != current.last_modified ) {
845- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
846- last.last_modified .c_str (), current.last_modified .c_str ());
847- should_download_from_scratch = true ;
848- }
803+ if (!last_etag.empty () && !etag.empty () && last_etag != etag) {
804+ LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
805+ last_etag.c_str (), etag.c_str ());
806+ should_download_from_scratch = true ;
849807 }
850808
851809 if (file_exists) {
@@ -873,9 +831,8 @@ static bool common_download_file_single_online(const std::string & url,
873831 }
874832
875833 // start the download
876- LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
877- __func__, show_masked_url (parts).c_str (), path_temporary.c_str (),
878- current.etag .c_str (), current.last_modified .c_str ());
834+ LOG_INF (" %s: trying to download model from %s to %s (etag:%s)...\n " ,
835+ __func__, show_masked_url (parts).c_str (), path_temporary.c_str (), etag.c_str ());
879836 const bool was_pull_successful = common_pull_file (cli, parts.path , path_temporary, supports_ranges, existing_size, total_size);
880837 if (!was_pull_successful) {
881838 if (i + 1 < max_attempts) {
@@ -885,15 +842,16 @@ static bool common_download_file_single_online(const std::string & url,
885842 } else {
886843 LOG_ERR (" %s: download failed after %d attempts\n " , __func__, max_attempts);
887844 }
888-
889845 continue ;
890846 }
891847
892848 if (std::rename (path_temporary.c_str (), path.c_str ()) != 0 ) {
893849 LOG_ERR (" %s: unable to rename file: %s to %s\n " , __func__, path_temporary.c_str (), path.c_str ());
894850 return false ;
895851 }
896- write_metadata (metadata_path, url, current);
852+ if (!etag.empty ()) {
853+ write_etag (path, etag);
854+ }
897855 break ;
898856 }
899857
@@ -936,6 +894,26 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
936894 return { res->status , std::move (buf) };
937895}
938896
897+ #else // no httplib
898+
899+ bool common_has_curl () {
900+ return false ;
901+ }
902+
903+ static bool common_download_file_single_online (const std::string &, const std::string &, const std::string &) {
904+ LOG_ERR (" error: built without CURL, cannot download model from internet\n " );
905+ return false ;
906+ }
907+
908+ std::pair<long , std::vector<char >> common_remote_get_content (const std::string & url, const common_remote_params &) {
909+ if (!url.empty ()) {
910+ throw std::runtime_error (" error: built without CURL, cannot download model from the internet" );
911+ }
912+
913+ return {};
914+ }
915+ #endif
916+
939917#endif // LLAMA_USE_CURL
940918
941919static bool common_download_file_single (const std::string & url,
0 commit comments