Skip to content

Commit e7df566

Browse files
author
kalibera
committed
Better handling of timeout of a file within simultaneous libcurl download,
reduces impact of concurrent transfers. git-svn-id: https://svn.r-project.org/R/trunk@87401 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 4059e8c commit e7df566

File tree

1 file changed

+90
-3
lines changed

1 file changed

+90
-3
lines changed

src/modules/internet/libcurl.c

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ extern void Rsleep(double timeint);
5454
#endif
5555

5656
static int current_timeout = 0;
57+
static double current_time = 0;
5758

5859
# if LIBCURL_VERSION_MAJOR < 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR < 28)
5960

@@ -195,6 +196,7 @@ static void download_report_url_error(CURLMsg *msg)
195196
const char *url, *strerr, *type;
196197
long status = 0;
197198
int *url_errs = NULL;
199+
int timedout = 0;
198200
curl_easy_getinfo(msg->easy_handle, CURLINFO_EFFECTIVE_URL, &url);
199201
curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE,
200202
&status);
@@ -214,7 +216,11 @@ static void download_report_url_error(CURLMsg *msg)
214216
url, type, status, strerr);
215217
} else {
216218
strerr = curl_easy_strerror(msg->data.result);
217-
if (streql(strerr, "Timeout was reached"))
219+
timedout = msg->data.result == CURLE_OPERATION_TIMEDOUT
220+
|| msg->data.result == CURLE_ABORTED_BY_CALLBACK
221+
|| streql(strerr, "Timeout was reached");
222+
223+
if (timedout)
218224
warning(_("URL '%s': Timeout of %d seconds was reached"),
219225
url, current_timeout);
220226
else
@@ -240,7 +246,8 @@ static int curlMultiCheckerrs(CURLM *mhnd)
240246
return retval;
241247
}
242248

243-
static void curlCommon(CURL *hnd, int redirect, int verify)
249+
static
250+
void curlCommon(CURL *hnd, int redirect, int verify)
244251
{
245252
const char *capath = getenv("CURL_CA_BUNDLE");
246253
if (verify) {
@@ -473,6 +480,7 @@ static winprogressbar pbar = {NULL, NULL, NULL};
473480
# define CURL_LEN double
474481
#endif
475482

483+
/* display a progress bar (used when downloading a single file) */
476484
static
477485
int progress(void *clientp, CURL_LEN dltotal, CURL_LEN dlnow,
478486
CURL_LEN ultotal, CURL_LEN ulnow)
@@ -533,6 +541,38 @@ int progress(void *clientp, CURL_LEN dltotal, CURL_LEN dlnow,
533541
return 0;
534542
# endif
535543
}
544+
545+
/* implement absolute-time timeout for the transfer time,
546+
used when downloading multiple files */
547+
static
548+
int progress_multi(void *clientp, CURL_LEN dltotal, CURL_LEN dlnow,
549+
CURL_LEN ultotal, CURL_LEN ulnow)
550+
{
551+
double *tstart = (double *) clientp;
552+
if (tstart) {
553+
if (*tstart == 0. && (dlnow > 0 || dltotal > 0))
554+
*tstart = current_time; /* record when transfer started */
555+
else if (*tstart > 0. && (current_time - *tstart) > current_timeout)
556+
return 1; /* abort transfer */
557+
}
558+
return 0;
559+
}
560+
561+
#if LIBCURL_VERSION_NUM >= 0x075000
562+
/* If this callback is available, use it to record the current time
563+
as start of transfer, for the purpose of absolute-time timeout for
564+
transfer time. This is to make sure that sending the request up to
565+
receiving the first byte of the file, or information about file length,
566+
is protected by a timeout. */
567+
static
568+
int prereq_multi(void *clientp, char *conn_primary_ip, char *conn_local_ip,
569+
int conn_primary_port, int conn_local_port)
570+
{
571+
double *tstart = (double *) clientp;
572+
*tstart = current_time;
573+
return CURL_PREREQFUNC_OK;
574+
}
575+
#endif
536576
#endif // HAVE_LIBCURL
537577

538578
typedef struct {
@@ -541,6 +581,7 @@ typedef struct {
541581
int nurls;
542582
CURL ***hnd;
543583
FILE **out;
584+
double *tstart;
544585
SEXP sfile;
545586
int *errs;
546587
#ifdef Win32
@@ -700,7 +741,45 @@ static int download_add_url(int i, SEXP scmd, const char *mode,
700741
curl_easy_setopt(c->hnd[i], CURLOPT_PROGRESSFUNCTION, progress);
701742
curl_easy_setopt(c->hnd[i], CURLOPT_PROGRESSDATA, c->hnd[i]);
702743
#endif
703-
} else curl_easy_setopt(c->hnd[i], CURLOPT_NOPROGRESS, 1L);
744+
} else if (quiet && single) {
745+
curl_easy_setopt(c->hnd[i], CURLOPT_NOPROGRESS, 1L);
746+
} else {
747+
curl_easy_setopt(c->hnd[i], CURLOPT_NOPROGRESS, 0L);
748+
749+
/* Implement absolute-time timeout as a replacement to CURLOPT_TIMEOUT
750+
for simultaneous download. The goal is to keep all parts of the
751+
download protected by a timeout, while reducing the risk that
752+
implementation details of the simultaneous download (such as the
753+
number of concurrent connections, limit of connections to the same
754+
server, waiting on HTTP/2 multiplexing) would cause downloads to
755+
time out unpredictably. Except contention over network bandwidth,
756+
this should also reduce the risk that timeouts that worked with
757+
sequential downloads would not be sufficient. All within the scope
758+
of R's single timeout value for internet operations. This assumes
759+
curl connection timeout is in use. CURLOPT_TIMEOUT is not used
760+
because it includes also the time transfers are queued by curl.
761+
*/
762+
curl_easy_setopt(c->hnd[i], CURLOPT_TIMEOUT, 0L);
763+
c->tstart[i] = 0.;
764+
/* cover the time from when first data is received */
765+
// For libcurl >= 7.32.0 use CURLOPT_XFERINFOFUNCTION
766+
#if LIBCURL_VERSION_NUM >= 0x072000
767+
curl_easy_setopt(c->hnd[i], CURLOPT_XFERINFOFUNCTION, progress_multi);
768+
curl_easy_setopt(c->hnd[i], CURLOPT_XFERINFODATA, &c->tstart[i]);
769+
#else
770+
curl_easy_setopt(c->hnd[i], CURLOPT_PROGRESSFUNCTION, progress_multi);
771+
curl_easy_setopt(c->hnd[i], CURLOPT_PROGRESSDATA, &c->tstart[i]);
772+
#endif
773+
/* cover the time between a connection is established and first
774+
data is received */
775+
#if LIBCURL_VERSION_NUM >= 0x075000
776+
curl_easy_setopt(c->hnd[i], CURLOPT_PREREQFUNCTION, prereq_multi);
777+
curl_easy_setopt(c->hnd[i], CURLOPT_PREREQDATA, &c->tstart[i]);
778+
#elif LIBCURL_VERSION_NUM >= 0x070100
779+
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, (long)current_timeout);
780+
curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
781+
#endif
782+
}
704783

705784
/* This would allow the negotiation of compressed HTTP transfers,
706785
but it is not clear it is always a good idea.
@@ -871,15 +950,18 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
871950
CURL **hnd[nurls];
872951
FILE *out[nurls];
873952
int errs[nurls];
953+
double tstart[nurls];
874954

875955
for(int i = 0; i < nurls; i++) {
876956
hnd[i] = NULL;
877957
out[i] = NULL;
878958
errs[i] = 0;
959+
tstart[i] = 0;
879960
}
880961
c.hnd = hnd;
881962
c.out = out;
882963
c.errs = errs;
964+
c.tstart = tstart;
883965

884966
int next_url = 0;
885967

@@ -906,6 +988,7 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
906988
curl_multi_setopt(mhnd, CURLMOPT_MAX_HOST_CONNECTIONS, 6L);
907989
#endif
908990

991+
if (!single) current_time = currentTime();
909992
if (download_add_one_url(&next_url, scmd, mode, quiet, single, &c)) {
910993
// no dest files could be opened, so bail out
911994
endcontext(&cntxt);
@@ -918,9 +1001,11 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
9181001
mode, quiet, single, &c);
9191002

9201003
R_Busy(1);
1004+
if (!single) current_time = currentTime();
9211005
// curl_multi_wait needs curl >= 7.28.0 .
9221006
curl_multi_perform(mhnd, &still_running);
9231007
do {
1008+
if (!single) current_time = currentTime();
9241009
int numfds;
9251010
CURLMcode mc = curl_multi_wait(mhnd, NULL, 0, 100, &numfds);
9261011
if (mc != CURLM_OK) { // internal, do not translate
@@ -936,6 +1021,7 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
9361021
if (repeats++ > 0) Rsleep(0.1); // do not block R process
9371022
} else repeats = 0;
9381023
R_ProcessEvents();
1024+
if (!single) current_time = currentTime();
9391025
curl_multi_perform(mhnd, &still_running);
9401026

9411027
if (!single)
@@ -952,6 +1038,7 @@ in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
9521038
download_try_add_urls(&next_url, MAX_CONCURRENT_URLS - still_running,
9531039
scmd, mode, quiet, single, &c);
9541040

1041+
if (!single) current_time = currentTime();
9551042
curl_multi_perform(mhnd, &still_running);
9561043
} while(still_running || next_url < nurls);
9571044
R_Busy(0);

0 commit comments

Comments
 (0)