@@ -217,6 +217,53 @@ struct common_hf_file_res {
217
217
std::string mmprojFile;
218
218
};
219
219
220
+ static void write_etag (const std::string & path, const std::string & etag) {
221
+ const std::string etag_path = path + " .etag" ;
222
+ write_file (etag_path, etag);
223
+ LOG_DBG (" %s: file etag saved: %s\n " , __func__, etag_path.c_str ());
224
+ }
225
+
226
+ static std::string read_etag (const std::string & path) {
227
+ std::string none;
228
+ const std::string etag_path = path + " .etag" ;
229
+
230
+ if (std::filesystem::exists (etag_path)) {
231
+ std::ifstream etag_in (etag_path);
232
+ if (!etag_in) {
233
+ LOG_ERR (" %s: could not open .etag file for reading: %s\n " , __func__, etag_path.c_str ());
234
+ return none;
235
+ }
236
+ std::string etag;
237
+ std::getline (etag_in, etag);
238
+ return etag;
239
+ }
240
+
241
+ // no etag file, but maybe there is an old .json
242
+ // remove this code later
243
+ const std::string metadata_path = path + " .json" ;
244
+
245
+ if (std::filesystem::exists (metadata_path)) {
246
+ std::ifstream metadata_in (metadata_path);
247
+ try {
248
+ nlohmann::json metadata_json;
249
+ metadata_in >> metadata_json;
250
+ LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
251
+ metadata_json.dump ().c_str ());
252
+ if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
253
+ std::string etag = metadata_json.at (" etag" );
254
+ write_etag (path, etag);
255
+ if (!std::filesystem::remove (metadata_path)) {
256
+ LOG_WRN (" %s: failed to delete old .json metadata file: %s\n " , __func__, metadata_path.c_str ());
257
+ }
258
+ return etag;
259
+ }
260
+ } catch (const nlohmann::json::exception & e) {
261
+ LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
262
+ }
263
+ }
264
+ return none;
265
+ }
266
+
220
267
#ifdef LLAMA_USE_CURL
221
268
222
269
bool common_has_curl () {
@@ -373,36 +420,15 @@ static bool common_download_head(CURL * curl,
373
420
static bool common_download_file_single_online (const std::string & url,
374
421
const std::string & path,
375
422
const std::string & bearer_token) {
376
- // If the file exists, check its JSON metadata companion file.
377
- std::string metadata_path = path + " .json" ;
378
423
static const int max_attempts = 3 ;
379
424
static const int retry_delay_seconds = 2 ;
380
425
for (int i = 0 ; i < max_attempts; ++i) {
381
- nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
382
- std::string etag;
383
- std::string last_modified;
426
+ std::string etag;
384
427
385
428
// Check if the file already exists locally
386
429
const auto file_exists = std::filesystem::exists (path);
387
430
if (file_exists) {
388
- // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
389
- std::ifstream metadata_in (metadata_path);
390
- if (metadata_in.good ()) {
391
- try {
392
- metadata_in >> metadata;
393
- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, metadata_path.c_str (),
394
- metadata.dump ().c_str ());
395
- if (metadata.contains (" etag" ) && metadata.at (" etag" ).is_string ()) {
396
- etag = metadata.at (" etag" );
397
- }
398
- if (metadata.contains (" lastModified" ) && metadata.at (" lastModified" ).is_string ()) {
399
- last_modified = metadata.at (" lastModified" );
400
- }
401
- } catch (const nlohmann::json::exception & e) {
402
- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, metadata_path.c_str (), e.what ());
403
- }
404
- }
405
- // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
431
+ etag = read_etag (path);
406
432
} else {
407
433
LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
408
434
}
@@ -440,11 +466,6 @@ static bool common_download_file_single_online(const std::string & url,
440
466
headers.etag .c_str ());
441
467
should_download = true ;
442
468
should_download_from_scratch = true ;
443
- } else if (!last_modified.empty () && last_modified != headers.last_modified ) {
444
- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
445
- last_modified.c_str (), headers.last_modified .c_str ());
446
- should_download = true ;
447
- should_download_from_scratch = true ;
448
469
}
449
470
}
450
471
@@ -475,15 +496,9 @@ static bool common_download_file_single_online(const std::string & url,
475
496
}
476
497
}
477
498
}
478
-
479
- // Write the updated JSON metadata file.
480
- metadata.update ({
481
- { " url" , url },
482
- { " etag" , headers.etag },
483
- { " lastModified" , headers.last_modified }
484
- });
485
- write_file (metadata_path, metadata.dump (4 ));
486
- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, metadata_path.c_str ());
499
+ if (head_request_ok) {
500
+ write_etag (path, headers.etag );
501
+ }
487
502
488
503
// start the download
489
504
LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
@@ -664,51 +679,6 @@ static void print_progress(size_t current, size_t total) { // TODO isatty
664
679
std::cout.flush ();
665
680
}
666
681
667
- struct common_file_metadata {
668
- std::string etag;
669
- std::string last_modified;
670
- };
671
-
672
- static std::optional<common_file_metadata> read_metadata (const std::string & path) {
673
- if (!std::filesystem::exists (path)) {
674
- return std::nullopt ;
675
- }
676
-
677
- nlohmann::json metadata_json;
678
- common_file_metadata metadata;
679
-
680
- std::ifstream metadata_in (path);
681
- try {
682
- metadata_in >> metadata_json;
683
- LOG_DBG (" %s: previous metadata file found %s: %s\n " , __func__, path.c_str (),
684
- metadata_json.dump ().c_str ());
685
- if (metadata_json.contains (" etag" ) && metadata_json.at (" etag" ).is_string ()) {
686
- metadata.etag = metadata_json.at (" etag" );
687
- }
688
- if (metadata_json.contains (" lastModified" ) && metadata_json.at (" lastModified" ).is_string ()) {
689
- metadata.last_modified = metadata_json.at (" lastModified" );
690
- }
691
- } catch (const nlohmann::json::exception & e) {
692
- LOG_ERR (" %s: error reading metadata file %s: %s\n " , __func__, path.c_str (), e.what ());
693
- return std::nullopt ;
694
- }
695
-
696
- return metadata;
697
- }
698
-
699
- static void write_metadata (const std::string & path,
700
- const std::string & url,
701
- const common_file_metadata & metadata) {
702
- nlohmann::json metadata_json = {
703
- { " url" , url },
704
- { " etag" , metadata.etag },
705
- { " lastModified" , metadata.last_modified }
706
- };
707
-
708
- write_file (path, metadata_json.dump (4 ));
709
- LOG_DBG (" %s: file metadata saved: %s\n " , __func__, path.c_str ());
710
- }
711
-
712
682
static bool common_pull_file (httplib::Client & cli,
713
683
const std::string & resolve_path,
714
684
const std::string & path_tmp,
@@ -775,8 +745,6 @@ static bool common_pull_file(httplib::Client & cli,
775
745
static bool common_download_file_single_online (const std::string & url,
776
746
const std::string & path,
777
747
const std::string & bearer_token) {
778
- // If the file exists, check its JSON metadata companion file.
779
- std::string metadata_path = path + " .json" ;
780
748
static const int max_attempts = 3 ;
781
749
static const int retry_delay_seconds = 2 ;
782
750
@@ -788,12 +756,11 @@ static bool common_download_file_single_online(const std::string & url,
788
756
}
789
757
cli.set_default_headers (default_headers);
790
758
791
- common_file_metadata last;
792
759
const bool file_exists = std::filesystem::exists (path);
760
+
761
+ std::string last_etag;
793
762
if (file_exists) {
794
- if (auto opt = read_metadata (metadata_path)) {
795
- last = *opt;
796
- }
763
+ last_etag = read_etag (path);
797
764
} else {
798
765
LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
799
766
}
@@ -809,14 +776,9 @@ static bool common_download_file_single_online(const std::string & url,
809
776
}
810
777
}
811
778
812
- common_file_metadata current;
813
- if (head_ok) {
814
- if (head->has_header (" ETag" )) {
815
- current.etag = head->get_header_value (" ETag" );
816
- }
817
- if (head->has_header (" Last-Modified" )) {
818
- current.last_modified = head->get_header_value (" Last-Modified" );
819
- }
779
+ std::string etag;
780
+ if (head_ok && head->has_header (" ETag" )) {
781
+ etag = head->get_header_value (" ETag" );
820
782
}
821
783
822
784
size_t total_size = 0 ;
@@ -834,16 +796,10 @@ static bool common_download_file_single_online(const std::string & url,
834
796
}
835
797
836
798
bool should_download_from_scratch = false ;
837
- if (head_ok) {
838
- if (!last.etag .empty () && last.etag != current.etag ) {
839
- LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
840
- last.etag .c_str (), current.etag .c_str ());
841
- should_download_from_scratch = true ;
842
- } else if (!last.last_modified .empty () && last.last_modified != current.last_modified ) {
843
- LOG_WRN (" %s: Last-Modified header is different (%s != %s): triggering a new download\n " , __func__,
844
- last.last_modified .c_str (), current.last_modified .c_str ());
845
- should_download_from_scratch = true ;
846
- }
799
+ if (!last_etag.empty () && !etag.empty () && last_etag != etag) {
800
+ LOG_WRN (" %s: ETag header is different (%s != %s): triggering a new download\n " , __func__,
801
+ last_etag.c_str (), etag.c_str ());
802
+ should_download_from_scratch = true ;
847
803
}
848
804
849
805
if (file_exists) {
@@ -871,9 +827,8 @@ static bool common_download_file_single_online(const std::string & url,
871
827
}
872
828
873
829
// start the download
874
- LOG_INF (" %s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n " ,
875
- __func__, show_masked_url (parts).c_str (), path_temporary.c_str (),
876
- current.etag .c_str (), current.last_modified .c_str ());
830
+ LOG_INF (" %s: trying to download model from %s to %s (etag:%s)...\n " ,
831
+ __func__, show_masked_url (parts).c_str (), path_temporary.c_str (), etag.c_str ());
877
832
const bool was_pull_successful = common_pull_file (cli, parts.path , path_temporary, supports_ranges, existing_size, total_size);
878
833
if (!was_pull_successful) {
879
834
if (i + 1 < max_attempts) {
@@ -883,15 +838,16 @@ static bool common_download_file_single_online(const std::string & url,
883
838
} else {
884
839
LOG_ERR (" %s: download failed after %d attempts\n " , __func__, max_attempts);
885
840
}
886
-
887
841
continue ;
888
842
}
889
843
890
844
if (std::rename (path_temporary.c_str (), path.c_str ()) != 0 ) {
891
845
LOG_ERR (" %s: unable to rename file: %s to %s\n " , __func__, path_temporary.c_str (), path.c_str ());
892
846
return false ;
893
847
}
894
- write_metadata (metadata_path, url, current);
848
+ if (!etag.empty ()) {
849
+ write_etag (path, etag);
850
+ }
895
851
break ;
896
852
}
897
853
0 commit comments