Skip to content

Commit 0b0fb49

Browse files
committed
Implement resumable downloads in common_download_file_single function
- Add detection of partial download files (.downloadInProgress) - Check server support for HTTP Range requests via Accept-Ranges header - Implement HTTP Range request with "bytes=<start>-" header - Open files in append mode when resuming vs create mode for new downloads - Maintain backwards compatibility with existing functionality Signed-off-by: Eric Curtin <[email protected]>
1 parent d166c42 commit 0b0fb49

File tree

1 file changed

+36
-4
lines changed

1 file changed

+36
-4
lines changed

common/arg.cpp

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
289289
struct common_load_model_from_url_headers {
290290
std::string etag;
291291
std::string last_modified;
292+
std::string accept_ranges;
292293
};
293294

294295
common_load_model_from_url_headers headers;
@@ -328,6 +329,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
328329
static std::regex header_regex("([^:]+): (.*)\r\n");
329330
static std::regex etag_regex("ETag", std::regex_constants::icase);
330331
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
332+
static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
331333

332334
std::string header(buffer, n_items);
333335
std::smatch match;
@@ -338,6 +340,8 @@ static bool common_download_file_single(const std::string & url, const std::stri
338340
headers->etag = value;
339341
} else if (std::regex_match(key, match, last_modified_regex)) {
340342
headers->last_modified = value;
343+
} else if (std::regex_match(key, match, accept_ranges_regex)) {
344+
headers->accept_ranges = value;
341345
}
342346
}
343347
return n_items;
@@ -380,7 +384,22 @@ static bool common_download_file_single(const std::string & url, const std::stri
380384

381385
if (should_download) {
382386
std::string path_temporary = path + ".downloadInProgress";
383-
if (file_exists) {
387+
long partial_size = 0; // Check if partial download exists and get its size
388+
if (std::filesystem::exists(path_temporary)) {
389+
partial_size = static_cast<long>(std::filesystem::file_size(path_temporary));
390+
LOG_INF("%s: found partial download: %s (%ld bytes)\n", __func__, path_temporary.c_str(), partial_size);
391+
if (head_request_ok) { // Check if server supports range requests
392+
bool server_supports_ranges = (headers.accept_ranges == "bytes");
393+
if (server_supports_ranges && partial_size > 0) {
394+
LOG_INF("%s: server supports range requests, resuming download from byte %ld\n", __func__, partial_size);
395+
}
396+
else {
397+
partial_size = 0;
398+
}
399+
}
400+
}
401+
402+
if (file_exists && !partial_size) {
384403
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
385404
if (remove(path.c_str()) != 0) {
386405
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
@@ -396,7 +415,9 @@ static bool common_download_file_single(const std::string & url, const std::stri
396415
}
397416
};
398417

399-
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
418+
// Open file in append mode if resuming, otherwise create new file
419+
const char * mode = partial_size ? "ab" : "wb";
420+
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), mode));
400421
if (!outfile) {
401422
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
402423
return false;
@@ -406,9 +427,15 @@ static bool common_download_file_single(const std::string & url, const std::stri
406427
auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
407428
return fwrite(data, size, nmemb, (FILE *)fd);
408429
};
430+
409431
curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
410432
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
411433
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
434+
if (partial_size) { // Add Range header if resuming
435+
std::string range_header = "Range: bytes=" + std::to_string(partial_size) + "-";
436+
http_headers.ptr = curl_slist_append(http_headers.ptr, range_header.c_str());
437+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
438+
}
412439

413440
// display download progress
414441
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
@@ -429,8 +456,13 @@ static bool common_download_file_single(const std::string & url, const std::stri
429456
};
430457

431458
// start the download
432-
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
433-
llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
459+
if (partial_size) {
460+
LOG_INF("%s: resuming download from %s to %s from byte %ld (server_etag:%s, server_last_modified:%s)...\n", __func__,
461+
llama_download_hide_password_in_url(url).c_str(), path.c_str(), partial_size, headers.etag.c_str(), headers.last_modified.c_str());
462+
} else {
463+
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
464+
llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
465+
}
434466

435467
// Write the updated JSON metadata file.
436468
metadata.update({

0 commit comments

Comments
 (0)