Skip to content

Commit 4ebba3d

Browse files
committed
CDRIVER-532 - Coalesce small vectorized TLS writes
Rather than calling BIO_write multiple times for each iovec_t in a call to mongoc_stream_tls_writev(), buffer them and send them all at once. Attempt to "do the right thing"tm, when buffering provides no performance benefit.
1 parent 8f61df9 commit 4ebba3d

File tree

2 files changed

+141
-35
lines changed

2 files changed

+141
-35
lines changed

src/mongoc/mongoc-stream-tls.c

Lines changed: 128 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
#undef MONGOC_LOG_DOMAIN
4343
#define MONGOC_LOG_DOMAIN "stream-tls"
4444

45+
#define MONGOC_STREAM_TLS_BUFFER_SIZE 4096
46+
4547

4648
/**
4749
* mongoc_stream_tls_t:
@@ -448,6 +450,53 @@ _mongoc_stream_tls_flush (mongoc_stream_t *stream)
448450
}
449451

450452

453+
static ssize_t
454+
_mongoc_stream_tls_write (mongoc_stream_tls_t *tls,
455+
char *buf,
456+
size_t buf_len)
457+
{
458+
ssize_t ret;
459+
460+
int64_t now;
461+
int64_t expire = 0;
462+
463+
BSON_ASSERT (tls);
464+
BSON_ASSERT (buf);
465+
BSON_ASSERT (buf_len);
466+
467+
if (tls->timeout_msec >= 0) {
468+
expire = bson_get_monotonic_time () + (tls->timeout_msec * 1000UL);
469+
}
470+
471+
ret = BIO_write (tls->bio, buf, buf_len);
472+
473+
if (ret < 0) {
474+
return ret;
475+
}
476+
477+
if (expire) {
478+
now = bson_get_monotonic_time ();
479+
480+
if ((expire - now) < 0) {
481+
if (ret < buf_len) {
482+
mongoc_counter_streams_timeout_inc();
483+
#ifdef _WIN32
484+
errno = WSAETIMEDOUT;
485+
#else
486+
errno = ETIMEDOUT;
487+
#endif
488+
}
489+
490+
tls->timeout_msec = 0;
491+
} else {
492+
tls->timeout_msec = (expire - now) / 1000L;
493+
}
494+
}
495+
496+
return ret;
497+
}
498+
499+
451500
/*
452501
*--------------------------------------------------------------------------
453502
*
@@ -473,62 +522,112 @@ _mongoc_stream_tls_writev (mongoc_stream_t *stream,
473522
int32_t timeout_msec)
474523
{
475524
mongoc_stream_tls_t *tls = (mongoc_stream_tls_t *)stream;
525+
char buf[MONGOC_STREAM_TLS_BUFFER_SIZE];
526+
476527
ssize_t ret = 0;
528+
ssize_t child_ret;
477529
size_t i;
478530
size_t iov_pos = 0;
479-
int write_ret;
480531

481-
int64_t now;
482-
int64_t expire = 0;
532+
/* There's a bit of a dance to coalesce vectorized writes into
533+
* MONGOC_STREAM_TLS_BUFFER_SIZE'd writes to avoid lots of small tls
534+
* packets.
535+
*
536+
* The basic idea is that we want to combine writes in the buffer if they're
537+
* smaller than the buffer, flushing as it gets full. For larger writes, or
538+
* the last write in the iovec array, we want to ignore the buffer and just
539+
* write immediately. We take care of doing buffer writes by re-invoking
540+
* ourself with a single iovec_t, pointing at our stack buffer.
541+
*/
542+
char *buf_head = buf;
543+
char *buf_tail = buf;
544+
char *buf_end = buf + MONGOC_STREAM_TLS_BUFFER_SIZE;
545+
size_t bytes;
546+
547+
char *to_write = NULL;
548+
size_t to_write_len;
483549

484550
BSON_ASSERT (tls);
485551
BSON_ASSERT (iov);
486552
BSON_ASSERT (iovcnt);
487553

488554
tls->timeout_msec = timeout_msec;
489555

490-
if (timeout_msec >= 0) {
491-
expire = bson_get_monotonic_time () + (timeout_msec * 1000UL);
492-
}
493-
494556
for (i = 0; i < iovcnt; i++) {
495557
iov_pos = 0;
496558

497559
while (iov_pos < iov[i].iov_len) {
498-
write_ret = BIO_write (tls->bio, (char *)iov[i].iov_base + iov_pos,
499-
(int)(iov[i].iov_len - iov_pos));
560+
if (buf_head != buf_tail ||
561+
((i + 1 < iovcnt) &&
562+
((buf_end - buf_tail) > (iov[i].iov_len - iov_pos)))) {
563+
/* If we have either of:
564+
* - buffered bytes already
565+
* - another iovec to send after this one and we don't have more
566+
* bytes to send than the size of the buffer.
567+
*
568+
* copy into the buffer */
569+
570+
bytes = BSON_MIN (iov[i].iov_len - iov_pos, buf_end - buf_tail);
571+
572+
memcpy (buf_tail, iov[i].iov_base + iov_pos, bytes);
573+
buf_tail += bytes;
574+
iov_pos += bytes;
575+
576+
if (buf_tail == buf_end) {
577+
/* If we're full, request send */
578+
579+
to_write = buf_head;
580+
to_write_len = buf_tail - buf_head;
581+
582+
buf_tail = buf_head = buf;
583+
}
584+
} else {
585+
/* Didn't buffer, so just write it through */
586+
587+
to_write = (char *)iov[i].iov_base + iov_pos;
588+
to_write_len = iov[i].iov_len - iov_pos;
500589

501-
if (write_ret < 0) {
502-
return write_ret;
590+
iov_pos += to_write_len;
503591
}
504592

505-
if (expire) {
506-
now = bson_get_monotonic_time ();
593+
if (to_write) {
594+
/* We get here if we buffered some bytes and filled the buffer, or
595+
* if we didn't buffer and have to send out of the iovec */
507596

508-
if ((expire - now) < 0) {
509-
if (write_ret == 0) {
510-
mongoc_counter_streams_timeout_inc();
511-
#ifdef _WIN32
512-
errno = WSAETIMEDOUT;
513-
#else
514-
errno = ETIMEDOUT;
515-
#endif
516-
return -1;
517-
}
597+
child_ret = _mongoc_stream_tls_write (tls, to_write, to_write_len);
518598

519-
tls->timeout_msec = 0;
520-
} else {
521-
tls->timeout_msec = (expire - now) / 1000L;
599+
if (child_ret < 0) {
600+
/* Buffer write failed, just return the error */
601+
return child_ret;
602+
}
603+
604+
ret += child_ret;
605+
606+
if (child_ret < to_write_len) {
607+
/* we timed out, so send back what we could send */
608+
609+
return ret;
522610
}
611+
612+
to_write = NULL;
523613
}
614+
}
615+
}
616+
617+
if (buf_head != buf_tail) {
618+
/* If we have any bytes buffered, send */
524619

525-
ret += write_ret;
526-
iov_pos += write_ret;
620+
child_ret = _mongoc_stream_tls_write (tls, buf_head, buf_tail - buf_head);
621+
622+
if (child_ret < 0) {
623+
return child_ret;
527624
}
625+
626+
ret += child_ret;
528627
}
529628

530629
if (ret >= 0) {
531-
mongoc_counter_streams_egress_add(ret);
630+
mongoc_counter_streams_egress_add (ret);
532631
}
533632

534633
return ret;

tests/ssl-test.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
#define TIMEOUT 1000
1111

12+
#define NUM_IOVECS 2000
13+
1214
#define LOCALHOST "127.0.0.1"
1315

1416
typedef struct ssl_test_data
@@ -45,7 +47,7 @@ ssl_test_server (void * ptr)
4547
mongoc_socket_t *listen_sock;
4648
mongoc_socket_t *conn_sock;
4749
socklen_t sock_len;
48-
char buf[1024];
50+
char buf[4 * NUM_IOVECS];
4951
ssize_t r;
5052
mongoc_iovec_t iov;
5153
struct sockaddr_in server_addr = { 0 };
@@ -166,11 +168,13 @@ ssl_test_client (void * ptr)
166168
mongoc_stream_t *sock_stream;
167169
mongoc_stream_t *ssl_stream;
168170
mongoc_socket_t *conn_sock;
171+
int i;
169172
int errno_captured;
170173
char buf[1024];
171174
ssize_t r;
172175
mongoc_iovec_t riov;
173176
mongoc_iovec_t wiov;
177+
mongoc_iovec_t wiov_many[NUM_IOVECS];
174178
struct sockaddr_in server_addr = { 0 };
175179
int len;
176180

@@ -237,18 +241,21 @@ ssl_test_client (void * ptr)
237241
return NULL;
238242
}
239243

240-
len = 4;
244+
len = 4 * NUM_IOVECS;
241245

242246
wiov.iov_base = (void *)&len;
243247
wiov.iov_len = 4;
244248
r = mongoc_stream_writev(ssl_stream, &wiov, 1, TIMEOUT);
245249

246250
assert(r == wiov.iov_len);
247251

248-
wiov.iov_base = "foo";
249-
wiov.iov_len = 4;
250-
r = mongoc_stream_writev(ssl_stream, &wiov, 1, TIMEOUT);
251-
assert(r == wiov.iov_len);
252+
for (i = 0; i < NUM_IOVECS; i++) {
253+
wiov_many[i].iov_base = "foo";
254+
wiov_many[i].iov_len = 4;
255+
}
256+
257+
r = mongoc_stream_writev(ssl_stream, wiov_many, NUM_IOVECS, TIMEOUT);
258+
assert(r == wiov_many[0].iov_len * NUM_IOVECS);
252259

253260
riov.iov_len = 1;
254261

0 commit comments

Comments
 (0)