@@ -219,6 +219,53 @@ struct common_hf_file_res {
219219 std::string mmprojFile;
220220};
221221
222+ static void write_etag (const std::string & path, const std::string & etag) {
223+ const std::string etag_path = path + " .etag" ;
224+ write_file (etag_path, etag);
225+ LOG_DBG (" %s: file etag saved: %s\n " , __func__, etag_path.c_str ());
226+ }
227+
228+ static std::string read_etag (const std::string & path) {
229+ std::string none;
230+ const std::string etag_path = path + " .etag" ;
231+
232+ if (std::filesystem::exists (etag_path)) {
233+ std::ifstream etag_in (etag_path);
234+ if (!etag_in) {
235+ LOG_ERR (" %s: could not open .etag file for reading: %s\n " , __func__, etag_path.c_str ());
236+ return none;
237+ }
238+ std::string etag;
239+ std::getline (etag_in, etag);
240+ return etag;
241+ }
242+
243+ // no etag file, but maybe there is an old .json
244+ // remove this code later
245+ const std::string metadata_path = path + " .json" ;
246+
247+ if (std::filesystem::exists (metadata_path)) {
248+ std::ifstream metadata_in (metadata_path);
249+ try {
250+ nlohmann::json metadata_json;
251+ metadata_in >> metadata_json;
252+ LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
253+ metadata_json.dump ().c_str ());
254+ if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
255+ std::string etag = metadata_json.at (" etag" );
256+ write_etag (path, etag);
257+ if (!std::filesystem::remove (metadata_path)) {
258+ LOG_WRN (" %s: failed to delete old .json metadata file: %s\n " , __func__, metadata_path.c_str ());
259+ }
260+ return etag;
261+ }
262+ } catch (const nlohmann::json::exception & e) {
263+ LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
264+ }
265+ }
266+ return none;
267+ }
268+
222269#ifdef LLAMA_USE_CURL
223270
224271bool common_has_curl () {
@@ -375,36 +422,15 @@ static bool common_download_head(CURL * curl,
375422static bool common_download_file_single_online (const std::string & url,
376423 const std::string & path,
377424 const std::string & bearer_token) {
378- // If the file exists, check its JSON metadata companion file.
379- std::string metadata_path = path + " .json" ;
380425 static const int max_attempts = 3 ;
381426 static const int retry_delay_seconds = 2 ;
382427 for (int i = 0 ; i < max_attempts; ++i) {
383- nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
384- std::string etag;
385- std::string last_modified;
428+ std::string etag;
386429
387430 // Check if the file already exists locally
388431 const auto file_exists = std::filesystem::exists (path);
389432 if (file_exists) {
390- // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
391- std::ifstream metadata_in (metadata_path);
392- if (metadata_in.good ()) {
393- try {
394- metadata_in >> metadata;
395- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
396- metadata.dump ().c_str ());
397- if (metadata.contains (" etag" ) && metadata.at (" etag" ).is_string ()) {
398- etag = metadata.at (" etag" );
399- }
400- if (metadata.contains (" lastModified" ) && metadata.at (" lastModified" ).is_string ()) {
401- last_modified = metadata.at (" lastModified" );
402- }
403- } catch (const nlohmann::json::exception & e) {
404- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
405- }
406- }
407- // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
433+ etag = read_etag (path);
408434 } else {
409435 LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
410436 }
@@ -442,11 +468,6 @@ static bool common_download_file_single_online(const std::string & url,
442468 headers.etag .c_str ());
443469 should_download = true ;
444470 should_download_from_scratch = true ;
445- } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
446- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
447- last_modified.c_str (), headers.last_modified .c_str ());
448- should_download = true ;
449- should_download_from_scratch = true ;
450471 }
451472 }
452473
@@ -477,15 +498,9 @@ static bool common_download_file_single_online(const std::string & url,
477498 }
478499 }
479500 }
480-
481- // Write the updated JSON metadata file.
482- metadata.update ({
483- { " url" , url },
484- { " etag" , headers.etag },
485- { " lastModified" , headers.last_modified }
486- });
487- write_file (metadata_path, metadata.dump (4 ));
488- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
501+ if (head_request_ok) {
502+ write_etag (path, headers.etag );
503+ }
489504
490505 // start the download
491506 LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
@@ -668,51 +683,6 @@ static void print_progress(size_t current, size_t total) { // TODO isatty
668683 std::cout.flush ();
669684}
670685
671- struct common_file_metadata {
672- std::string etag;
673- std::string last_modified;
674- };
675-
676- static std::optional<common_file_metadata> read_metadata (const std::string & path) {
677- if (!std::filesystem::exists (path)) {
678- return std::nullopt ;
679- }
680-
681- nlohmann::json metadata_json;
682- common_file_metadata metadata;
683-
684- std::ifstream metadata_in (path);
685- try {
686- metadata_in >> metadata_json;
687- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, path.c_str (),
688- metadata_json.dump ().c_str ());
689- if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
690- metadata.etag = metadata_json.at (" etag" );
691- }
692- if (metadata_json.contains (" lastModified" ) && metadata_json.at (" lastModified" ).is_string ()) {
693- metadata.last_modified = metadata_json.at (" lastModified" );
694- }
695- } catch (const nlohmann::json::exception & e) {
696- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, path.c_str (), e.what ());
697- return std::nullopt ;
698- }
699-
700- return metadata;
701- }
702-
703- static void write_metadata (const std::string & path,
704- const std::string & url,
705- const common_file_metadata & metadata) {
706- nlohmann::json metadata_json = {
707- { " url" , url },
708- { " etag" , metadata.etag },
709- { " lastModified" , metadata.last_modified }
710- };
711-
712- write_file (path, metadata_json.dump (4 ));
713- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, path.c_str ());
714- }
715-
716686static bool common_pull_file (httplib::Client & cli,
717687 const std::string & resolve_path,
718688 const std::string & path_tmp,
@@ -779,8 +749,6 @@ static bool common_pull_file(httplib::Client & cli,
779749static bool common_download_file_single_online (const std::string & url,
780750 const std::string & path,
781751 const std::string & bearer_token) {
782- // If the file exists, check its JSON metadata companion file.
783- std::string metadata_path = path + " .json" ;
784752 static const int max_attempts = 3 ;
785753 static const int retry_delay_seconds = 2 ;
786754
@@ -792,12 +760,11 @@ static bool common_download_file_single_online(const std::string & url,
792760 }
793761 cli.set_default_headers (default_headers);
794762
795- common_file_metadata last;
796763 const bool file_exists = std::filesystem::exists (path);
764+
765+ std::string last_etag;
797766 if (file_exists) {
798- if (auto opt = read_metadata (metadata_path)) {
799- last = *opt;
800- }
767+ last_etag = read_etag (path);
801768 } else {
802769 LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
803770 }
@@ -813,14 +780,9 @@ static bool common_download_file_single_online(const std::string & url,
813780 }
814781 }
815782
816- common_file_metadata current;
817- if (head_ok) {
818- if (head->has_header (" ETag" )) {
819- current.etag = head->get_header_value (" ETag" );
820- }
821- if (head->has_header (" Last-Modified" )) {
822- current.last_modified = head->get_header_value (" Last-Modified" );
823- }
783+ std::string etag;
784+ if (head_ok && head->has_header (" ETag" )) {
785+ etag = head->get_header_value (" ETag" );
824786 }
825787
826788 size_t total_size = 0 ;
@@ -838,16 +800,10 @@ static bool common_download_file_single_online(const std::string & url,
838800 }
839801
840802 bool should_download_from_scratch = false ;
841- if (head_ok) {
842- if (!last.etag .empty () && last.etag != current.etag ) {
843- LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
844- last.etag .c_str (), current.etag .c_str ());
845- should_download_from_scratch = true ;
846- } else if (!last.last_modified .empty () && last.last_modified != current.last_modified ) {
847- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
848- last.last_modified .c_str (), current.last_modified .c_str ());
849- should_download_from_scratch = true ;
850- }
803+ if (!last_etag.empty () && !etag.empty () && last_etag != etag) {
804+ LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
805+ last_etag.c_str (), etag.c_str ());
806+ should_download_from_scratch = true ;
851807 }
852808
853809 if (file_exists) {
@@ -875,9 +831,8 @@ static bool common_download_file_single_online(const std::string & url,
875831 }
876832
877833 // start the download
878- LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
879- __func__, show_masked_url (parts).c_str (), path_temporary.c_str (),
880- current.etag .c_str (), current.last_modified .c_str ());
834+ LOG_INF (" %s: trying to download model from %s to %s (etag:%s)...\n " ,
835+ __func__, show_masked_url (parts).c_str (), path_temporary.c_str (), etag.c_str ());
881836 const bool was_pull_successful = common_pull_file (cli, parts.path , path_temporary, supports_ranges, existing_size, total_size);
882837 if (!was_pull_successful) {
883838 if (i + 1 < max_attempts) {
@@ -887,15 +842,16 @@ static bool common_download_file_single_online(const std::string & url,
887842 } else {
888843 LOG_ERR (" %s: download failed after %d attempts\n " , __func__, max_attempts);
889844 }
890-
891845 continue ;
892846 }
893847
894848 if (std::rename (path_temporary.c_str (), path.c_str ()) != 0 ) {
895849 LOG_ERR (" %s: unable to rename file: %s to %s\n " , __func__, path_temporary.c_str (), path.c_str ());
896850 return false ;
897851 }
898- write_metadata (metadata_path, url, current);
852+ if (!etag.empty ()) {
853+ write_etag (path, etag);
854+ }
899855 break ;
900856 }
901857
0 commit comments