@@ -217,6 +217,53 @@ struct common_hf_file_res {
217217 std::string mmprojFile;
218218};
219219
220+ static void write_etag (const std::string & path, const std::string & etag) {
221+ const std::string etag_path = path + " .etag" ;
222+ write_file (etag_path, etag);
223+ LOG_DBG (" %s: file etag saved: %s\n " , __func__, etag_path.c_str ());
224+ }
225+
226+ static std::string read_etag (const std::string & path) {
227+ std::string none;
228+ const std::string etag_path = path + " .etag" ;
229+
230+ if (std::filesystem::exists (etag_path)) {
231+ std::ifstream etag_in (etag_path);
232+ if (!etag_in) {
233+ LOG_ERR (" %s: could not open .etag file for reading: %s\n " , __func__, etag_path.c_str ());
234+ return none;
235+ }
236+ std::string etag;
237+ std::getline (etag_in, etag);
238+ return etag;
239+ }
240+
241+ // no etag file, but maybe there is an old .json
242+ // remove this code later
243+ const std::string metadata_path = path + " .json" ;
244+
245+ if (std::filesystem::exists (metadata_path)) {
246+ std::ifstream metadata_in (metadata_path);
247+ try {
248+ nlohmann::json metadata_json;
249+ metadata_in >> metadata_json;
250+ LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
251+ metadata_json.dump ().c_str ());
252+ if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
253+ std::string etag = metadata_json.at (" etag" );
254+ write_etag (path, etag);
255+ if (!std::filesystem::remove (metadata_path)) {
256+ LOG_WRN (" %s: failed to delete old .json metadata file: %s\n " , __func__, metadata_path.c_str ());
257+ }
258+ return etag;
259+ }
260+ } catch (const nlohmann::json::exception & e) {
261+ LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
262+ }
263+ }
264+ return none;
265+ }
266+
220267#ifdef LLAMA_USE_CURL
221268
222269bool common_has_curl () {
@@ -373,36 +420,15 @@ static bool common_download_head(CURL * curl,
373420static bool common_download_file_single_online (const std::string & url,
374421 const std::string & path,
375422 const std::string & bearer_token) {
376- // If the file exists, check its JSON metadata companion file.
377- std::string metadata_path = path + " .json" ;
378423 static const int max_attempts = 3 ;
379424 static const int retry_delay_seconds = 2 ;
380425 for (int i = 0 ; i < max_attempts; ++i) {
381- nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
382- std::string etag;
383- std::string last_modified;
426+ std::string etag;
384427
385428 // Check if the file already exists locally
386429 const auto file_exists = std::filesystem::exists (path);
387430 if (file_exists) {
388- // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
389- std::ifstream metadata_in (metadata_path);
390- if (metadata_in.good ()) {
391- try {
392- metadata_in >> metadata;
393- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
394- metadata.dump ().c_str ());
395- if (metadata.contains (" etag" ) && metadata.at (" etag" ).is_string ()) {
396- etag = metadata.at (" etag" );
397- }
398- if (metadata.contains (" lastModified" ) && metadata.at (" lastModified" ).is_string ()) {
399- last_modified = metadata.at (" lastModified" );
400- }
401- } catch (const nlohmann::json::exception & e) {
402- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
403- }
404- }
405- // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
431+ etag = read_etag (path);
406432 } else {
407433 LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
408434 }
@@ -440,11 +466,6 @@ static bool common_download_file_single_online(const std::string & url,
440466 headers.etag .c_str ());
441467 should_download = true ;
442468 should_download_from_scratch = true ;
443- } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
444- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
445- last_modified.c_str (), headers.last_modified .c_str ());
446- should_download = true ;
447- should_download_from_scratch = true ;
448469 }
449470 }
450471
@@ -475,15 +496,9 @@ static bool common_download_file_single_online(const std::string & url,
475496 }
476497 }
477498 }
478-
479- // Write the updated JSON metadata file.
480- metadata.update ({
481- { " url" , url },
482- { " etag" , headers.etag },
483- { " lastModified" , headers.last_modified }
484- });
485- write_file (metadata_path, metadata.dump (4 ));
486- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
499+ if (head_request_ok) {
500+ write_etag (path, headers.etag );
501+ }
487502
488503 // start the download
489504 LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
@@ -664,51 +679,6 @@ static void print_progress(size_t current, size_t total) { // TODO isatty
664679 std::cout.flush ();
665680}
666681
667- struct common_file_metadata {
668- std::string etag;
669- std::string last_modified;
670- };
671-
672- static std::optional<common_file_metadata> read_metadata (const std::string & path) {
673- if (!std::filesystem::exists (path)) {
674- return std::nullopt ;
675- }
676-
677- nlohmann::json metadata_json;
678- common_file_metadata metadata;
679-
680- std::ifstream metadata_in (path);
681- try {
682- metadata_in >> metadata_json;
683- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, path.c_str (),
684- metadata_json.dump ().c_str ());
685- if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
686- metadata.etag = metadata_json.at (" etag" );
687- }
688- if (metadata_json.contains (" lastModified" ) && metadata_json.at (" lastModified" ).is_string ()) {
689- metadata.last_modified = metadata_json.at (" lastModified" );
690- }
691- } catch (const nlohmann::json::exception & e) {
692- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, path.c_str (), e.what ());
693- return std::nullopt ;
694- }
695-
696- return metadata;
697- }
698-
699- static void write_metadata (const std::string & path,
700- const std::string & url,
701- const common_file_metadata & metadata) {
702- nlohmann::json metadata_json = {
703- { " url" , url },
704- { " etag" , metadata.etag },
705- { " lastModified" , metadata.last_modified }
706- };
707-
708- write_file (path, metadata_json.dump (4 ));
709- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, path.c_str ());
710- }
711-
712682static bool common_pull_file (httplib::Client & cli,
713683 const std::string & resolve_path,
714684 const std::string & path_tmp,
@@ -775,8 +745,6 @@ static bool common_pull_file(httplib::Client & cli,
775745static bool common_download_file_single_online (const std::string & url,
776746 const std::string & path,
777747 const std::string & bearer_token) {
778- // If the file exists, check its JSON metadata companion file.
779- std::string metadata_path = path + " .json" ;
780748 static const int max_attempts = 3 ;
781749 static const int retry_delay_seconds = 2 ;
782750
@@ -788,12 +756,11 @@ static bool common_download_file_single_online(const std::string & url,
788756 }
789757 cli.set_default_headers (default_headers);
790758
791- common_file_metadata last;
792759 const bool file_exists = std::filesystem::exists (path);
760+
761+ std::string last_etag;
793762 if (file_exists) {
794- if (auto opt = read_metadata (metadata_path)) {
795- last = *opt;
796- }
763+ last_etag = read_etag (path);
797764 } else {
798765 LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
799766 }
@@ -809,14 +776,9 @@ static bool common_download_file_single_online(const std::string & url,
809776 }
810777 }
811778
812- common_file_metadata current;
813- if (head_ok) {
814- if (head->has_header (" ETag" )) {
815- current.etag = head->get_header_value (" ETag" );
816- }
817- if (head->has_header (" Last-Modified" )) {
818- current.last_modified = head->get_header_value (" Last-Modified" );
819- }
779+ std::string etag;
780+ if (head_ok && head->has_header (" ETag" )) {
781+ etag = head->get_header_value (" ETag" );
820782 }
821783
822784 size_t total_size = 0 ;
@@ -834,16 +796,10 @@ static bool common_download_file_single_online(const std::string & url,
834796 }
835797
836798 bool should_download_from_scratch = false ;
837- if (head_ok) {
838- if (!last.etag .empty () && last.etag != current.etag ) {
839- LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
840- last.etag .c_str (), current.etag .c_str ());
841- should_download_from_scratch = true ;
842- } else if (!last.last_modified .empty () && last.last_modified != current.last_modified ) {
843- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
844- last.last_modified .c_str (), current.last_modified .c_str ());
845- should_download_from_scratch = true ;
846- }
799+ if (!last_etag.empty () && !etag.empty () && last_etag != etag) {
800+ LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
801+ last_etag.c_str (), etag.c_str ());
802+ should_download_from_scratch = true ;
847803 }
848804
849805 if (file_exists) {
@@ -871,9 +827,8 @@ static bool common_download_file_single_online(const std::string & url,
871827 }
872828
873829 // start the download
874- LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
875- __func__, show_masked_url (parts).c_str (), path_temporary.c_str (),
876- current.etag .c_str (), current.last_modified .c_str ());
830+ LOG_INF (" %s: trying to download model from %s to %s (etag:%s)...\n " ,
831+ __func__, show_masked_url (parts).c_str (), path_temporary.c_str (), etag.c_str ());
877832 const bool was_pull_successful = common_pull_file (cli, parts.path , path_temporary, supports_ranges, existing_size, total_size);
878833 if (!was_pull_successful) {
879834 if (i + 1 < max_attempts) {
@@ -883,15 +838,16 @@ static bool common_download_file_single_online(const std::string & url,
883838 } else {
884839 LOG_ERR (" %s: download failed after %d attempts\n " , __func__, max_attempts);
885840 }
886-
887841 continue ;
888842 }
889843
890844 if (std::rename (path_temporary.c_str (), path.c_str ()) != 0 ) {
891845 LOG_ERR (" %s: unable to rename file: %s to %s\n " , __func__, path_temporary.c_str (), path.c_str ());
892846 return false ;
893847 }
894- write_metadata (metadata_path, url, current);
848+ if (!etag.empty ()) {
849+ write_etag (path, etag);
850+ }
895851 break ;
896852 }
897853
0 commit comments