Skip to content

Commit b4e3057

Browse files
committed
MB-42607 [2/2]: Handle SSL_accept returning SSL_ERROR_WANT_WRITE
During TLS handshake with the Data Service, if the node certificate requires more than 8192 bytes to transmit then the handshake can fail with the following error: WARNING 634: ERROR: SSL_accept returned -1 with error 3 INFO 634 Closing connection [ 1.2.3.4:55555 - 5.6.7.8:11207 (not authenticated) ] due to read error: Connection reset by peer This is caused by KV-Engine SSL handshake code failig to handle one of the possible temporary status codes from SSL_accept(), namely SSL_ERROR_WANT_WRITE which occurs when OpenSSL has consumed the BIO send buffer but still has more data it wishes to write. Given the BIO buffer size is 8192 bytes, if sending the node certificate requires more than 8192B then SSL_ERROR_WANT_WRITE is returned by OpenSSL. Node certificates which are in excess of 8kB - for example those which contain a large number of Subject Alternative Names (SANs) - can encounter this problem. Fix by checking for SSL_ERROR_WANT_WRITE being returned from SSL_accept(), and flushing the read/write BIOs and retrying. Change-Id: Ic248c47a1bb22f6de64263a3edeb85818d2fc35f Reviewed-on: http://review.couchbase.org/c/kv_engine/+/139826 Tested-by: Build Bot <[email protected]> Reviewed-by: Trond Norbye <[email protected]> Reviewed-by: James Harrison <[email protected]> Well-Formed: Build Bot <[email protected]>
1 parent 35c21b1 commit b4e3057

File tree

3 files changed

+55
-16
lines changed

3 files changed

+55
-16
lines changed

daemon/connection.cc

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -791,8 +791,48 @@ void Connection::logSslErrorInfo(const std::string& method, int rval) {
791791
}
792792
}
793793

794+
int Connection::sslAcceptWithRetry() {
795+
while (true) {
796+
int r = ssl.accept();
797+
if (r == 1) {
798+
// handshake completed.
799+
return r;
800+
}
801+
802+
auto sslError = ssl.getError(r);
803+
if (sslError == SSL_ERROR_WANT_READ ||
804+
sslError == SSL_ERROR_WANT_WRITE) {
805+
// Drain send and receive pipes.
806+
// Note: This is somewhat of a naive implementation - ideally we
807+
// would only drain the specific pipe direction based on the status
808+
// code, repeating any drains until there is no more data ready
809+
// to transfer. However that requires a more expressive interface on
810+
// drainBio{Send,Recv}Pipe. Given SSL_accept() only occurs once
811+
// per connect at the start, having a simpler (but technically
812+
// sub-optimal) handing of errors here seems reasonable.
813+
ssl.drainBioSendPipe(socketDescriptor);
814+
if (ssl.hasError()) {
815+
cb::net::set_econnreset();
816+
return -1;
817+
}
818+
ssl.drainBioRecvPipe(socketDescriptor);
819+
if (ssl.hasError()) {
820+
cb::net::set_econnreset();
821+
return -1;
822+
}
823+
// Continue SSL accept handshake.
824+
continue;
825+
} else {
826+
logSslErrorInfo("SSL_accept", r);
827+
cb::net::set_econnreset();
828+
return -1;
829+
}
830+
}
831+
folly::assume_unreachable();
832+
}
833+
794834
int Connection::sslPreConnection() {
795-
int r = ssl.accept();
835+
int r = sslAcceptWithRetry();
796836
if (r == 1) {
797837
ssl.drainBioSendPipe(socketDescriptor);
798838
ssl.setConnected();
@@ -841,22 +881,10 @@ int Connection::sslPreConnection() {
841881
return -1;
842882
}
843883

844-
LOG_INFO("{}: Using SSL cipher:{}",
845-
getId(),
846-
ssl.getCurrentCipherName());
847-
} else {
848-
if (ssl.getError(r) == SSL_ERROR_WANT_READ) {
849-
ssl.drainBioSendPipe(socketDescriptor);
850-
cb::net::set_ewouldblock();
851-
return -1;
852-
} else {
853-
logSslErrorInfo("SSL_accept", r);
854-
cb::net::set_econnreset();
855-
return -1;
856-
}
884+
LOG_INFO(
885+
"{}: Using SSL cipher:{}", getId(), ssl.getCurrentCipherName());
857886
}
858-
859-
return 0;
887+
return r;
860888
}
861889

862890
int Connection::recv(char* dest, size_t nbytes) {

daemon/connection.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,13 @@ class Connection : public dcp_message_producers {
10491049
*/
10501050
int sslPreConnection();
10511051

1052+
/**
1053+
* Helper method for sslPreConnection, performs SSL accept, retrying
1054+
* any temporary errors.
1055+
* @return
1056+
*/
1057+
int sslAcceptWithRetry();
1058+
10521059
// Shared DCP_DELETION write function for the v1/v2 commands.
10531060
ENGINE_ERROR_CODE deletionInner(const item_info& info,
10541061
cb::const_byte_buffer packet,

tests/testapp/testapp_tls.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ class TlsTests : public TestappClientTest {
2626
protected:
2727
void SetUp() override {
2828
TestappTest::SetUp();
29+
// MB-42607: Reduce BIO buffer size from default 8192 to 64 bytes, to
30+
// exercise SSL_ERROR_WANT_READ / SSL_ERROR_WANT_WRITE code paths.
31+
memcached_cfg["bio_drain_buffer_sz"] = 64;
32+
2933
memcached_cfg["ssl_cipher_list"]["tls 1.2"] = "HIGH";
3034
memcached_cfg["ssl_cipher_list"]["tls 1.3"] =
3135
"TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:TLS_AES_"

0 commit comments

Comments
 (0)