Skip to content

Commit 9f9934d

Browse files
committed
CDRIVER-2172 check idle socket with "ping"
Server Selection Spec update for single-threaded drivers. If a server is selected that has an existing connection that has been idle for socketCheckIntervalMS, the driver MUST check the connection with the "ping" command. If the ping succeeds, use the selected connection. If not, set the server's type to Unknown and update the Topology Description according to the Server Discovery and Monitoring Spec, and attempt once more to select a server.
1 parent 28ed980 commit 9f9934d

File tree

6 files changed

+147
-59
lines changed

6 files changed

+147
-59
lines changed

src/mongoc/mongoc-client.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2169,7 +2169,6 @@ mongoc_client_select_server (mongoc_client_t *client,
21692169
{
21702170
mongoc_ss_optype_t optype = for_writes ? MONGOC_SS_WRITE : MONGOC_SS_READ;
21712171
mongoc_server_description_t *sd;
2172-
bson_error_t tmp_err;
21732172

21742173
if (for_writes && prefs) {
21752174
bson_set_error (error,
@@ -2188,7 +2187,7 @@ mongoc_client_select_server (mongoc_client_t *client,
21882187
return NULL;
21892188
}
21902189

2191-
if (mongoc_cluster_check_interval (&client->cluster, sd->id, &tmp_err)) {
2190+
if (mongoc_cluster_check_interval (&client->cluster, sd->id)) {
21922191
/* check not required, or it succeeded */
21932192
return sd;
21942193
}
@@ -2200,7 +2199,6 @@ mongoc_client_select_server (mongoc_client_t *client,
22002199
return sd;
22012200
}
22022201

2203-
memcpy (error, &tmp_err, sizeof tmp_err);
22042202
return NULL;
22052203
}
22062204

src/mongoc/mongoc-cluster-private.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,7 @@ _mongoc_cluster_buffer_iovec (mongoc_iovec_t *iov,
102102

103103
bool
104104
mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
105-
uint32_t server_id,
106-
bson_error_t *error);
105+
uint32_t server_id);
107106

108107
bool
109108
mongoc_cluster_sendv_to_server (mongoc_cluster_t *cluster,

src/mongoc/mongoc-cluster.c

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1979,6 +1979,16 @@ _mongoc_cluster_stream_for_optype (mongoc_cluster_t *cluster,
19791979
RETURN (NULL);
19801980
}
19811981

1982+
if (!mongoc_cluster_check_interval (cluster, server_id)) {
1983+
/* Server Selection Spec: try once more */
1984+
server_id = mongoc_topology_select_server_id (
1985+
topology, optype, read_prefs, error);
1986+
1987+
if (!server_id) {
1988+
RETURN (NULL);
1989+
}
1990+
}
1991+
19821992
/* connect or reconnect to server if necessary */
19831993
server_stream = _mongoc_cluster_stream_for_server (
19841994
cluster, server_id, true /* reconnect_ok */, error);
@@ -2167,37 +2177,36 @@ mongoc_cluster_get_max_msg_size (mongoc_cluster_t *cluster)
21672177
*
21682178
* mongoc_cluster_check_interval --
21692179
*
2170-
* Server Discovery And Monitoring Spec:
2180+
* Server Selection Spec:
21712181
*
2172-
* "Only for single-threaded clients. If a server is selected that has an
2173-
* existing connection that has been idle for socketCheckIntervalMS, the
2174-
* driver MUST check it, by using it for an ismaster call. Whether
2175-
* ismaster succeeds or fails, the driver MUST update its topology
2176-
* description with the outcome. Then, if the check failed, the driver
2177-
* MUST retry server selection once."
2182+
* Only for single-threaded drivers.
2183+
*
2184+
* If a server is selected that has an existing connection that has been
2185+
* idle for socketCheckIntervalMS, the driver MUST check the connection
2186+
* with the "ping" command. If the ping succeeds, use the selected
2187+
* connection. If not, set the server's type to Unknown and update the
2188+
* Topology Description according to the Server Discovery and Monitoring
2189+
* Spec, and attempt once more to select a server.
21782190
*
21792191
* Returns:
21802192
* True if the check succeeded or no check was required, false if the
21812193
* check failed.
21822194
*
21832195
* Side effects:
2184-
* If a check is needed, updates the topology with its outcome.
2196+
* If a check fails, closes stream and may set server type Unknown.
21852197
*
21862198
*--------------------------------------------------------------------------
21872199
*/
21882200

21892201
bool
2190-
mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
2191-
uint32_t server_id,
2192-
bson_error_t *error)
2202+
mongoc_cluster_check_interval (mongoc_cluster_t *cluster, uint32_t server_id)
21932203
{
21942204
mongoc_topology_t *topology;
21952205
mongoc_topology_scanner_node_t *scanner_node;
21962206
mongoc_stream_t *stream;
21972207
int64_t now;
2198-
int64_t before_ismaster;
21992208
bson_t command;
2200-
bson_t reply;
2209+
bson_error_t error;
22012210
bool r = true;
22022211

22032212
topology = cluster->client->topology;
@@ -2226,41 +2235,30 @@ mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
22262235
if (scanner_node->last_used + (1000 * CHECK_CLOSED_DURATION_MSEC) < now) {
22272236
if (mongoc_stream_check_closed (stream)) {
22282237
mongoc_cluster_disconnect_node (cluster, server_id);
2229-
bson_set_error (error,
2230-
MONGOC_ERROR_STREAM,
2231-
MONGOC_ERROR_STREAM_SOCKET,
2232-
"Stream is closed");
22332238
return false;
22342239
}
22352240
}
22362241

22372242
if (scanner_node->last_used + (1000 * cluster->socketcheckintervalms) <
22382243
now) {
22392244
bson_init (&command);
2240-
BSON_APPEND_INT32 (&command, "ismaster", 1);
2241-
2242-
before_ismaster = now;
2245+
BSON_APPEND_INT32 (&command, "ping", 1);
22432246
r = mongoc_cluster_run_command_private (cluster,
22442247
stream,
22452248
server_id,
22462249
MONGOC_QUERY_SLAVE_OK,
22472250
"admin",
22482251
&command,
2249-
&reply,
2250-
error /* OUT */);
2251-
2252-
now = bson_get_monotonic_time ();
2252+
NULL,
2253+
&error /* OUT */);
22532254

22542255
bson_destroy (&command);
22552256

2256-
mongoc_topology_description_handle_ismaster (&topology->description,
2257-
server_id,
2258-
&reply,
2259-
(now - before_ismaster) /
2260-
1000, /* RTT_MS */
2261-
error /* IN */);
2262-
2263-
bson_destroy (&reply);
2257+
if (!r) {
2258+
mongoc_cluster_disconnect_node (cluster, server_id);
2259+
mongoc_topology_invalidate_server (
2260+
topology, server_id, &error /* IN */);
2261+
}
22642262
}
22652263

22662264
return r;

src/mongoc/mongoc-collection.c

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -353,30 +353,27 @@ mongoc_collection_aggregate (mongoc_collection_t *collection, /* IN */
353353
if (server_id) {
354354
/* will set slaveok bit if server is not mongos */
355355
mongoc_cursor_set_hint (cursor, server_id);
356-
} else {
357-
server_id =
358-
mongoc_topology_select_server_id (collection->client->topology,
359-
MONGOC_SS_READ,
360-
read_prefs,
356+
357+
/* server id isn't enough. ensure we're connected & know its wire version */
358+
server_stream =
359+
mongoc_cluster_stream_for_server (&collection->client->cluster,
360+
cursor->server_id,
361+
true /* reconnect ok */,
361362
&cursor->error);
362363

363-
if (!server_id) {
364+
if (!server_stream) {
364365
GOTO (done);
365366
}
367+
} else {
368+
server_stream = mongoc_cluster_stream_for_reads (
369+
&collection->client->cluster, read_prefs, &cursor->error);
366370

367-
/* don't use mongoc_cursor_set_hint, don't want special slaveok logic */
368-
cursor->server_id = server_id;
369-
}
370-
371-
/* server id isn't enough. ensure we're connected & know its wire version */
372-
server_stream =
373-
mongoc_cluster_stream_for_server (&collection->client->cluster,
374-
cursor->server_id,
375-
true /* reconnect ok */,
376-
&cursor->error);
371+
if (!server_stream) {
372+
GOTO (done);
373+
}
377374

378-
if (!server_stream) {
379-
GOTO (done);
375+
/* don't use mongoc_cursor_set_hint, don't want special slaveok logic */
376+
cursor->server_id = server_stream->sd->id;
380377
}
381378

382379
use_cursor = server_stream->sd->max_wire_version >= WIRE_VERSION_AGG_CURSOR;

src/mongoc/mongoc-server-description.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ mongoc_server_description_reset (mongoc_server_description_t *sd)
5757
/* set other fields to default or empty states. election_id is zeroed. */
5858
memset (
5959
&sd->set_name, 0, sizeof (*sd) - ((char *) &sd->set_name - (char *) sd));
60+
memset (&sd->error, 0, sizeof sd->error);
6061
sd->set_name = NULL;
6162
sd->type = MONGOC_SERVER_UNKNOWN;
6263

tests/test-mongoc-client.c

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2229,8 +2229,7 @@ test_mongoc_client_select_server_error_pooled (void)
22292229

22302230

22312231
/* CDRIVER-2172: in single mode, if the selected server has a socket that's been
2232-
* idle for socketCheckIntervalMS, check it with an ismaster call. If that
2233-
* fails, retry once.
2232+
* idle for socketCheckIntervalMS, check it with ping. If it fails, retry once.
22342233
*/
22352234
static void
22362235
_test_mongoc_client_select_server_retry (bool retry_succeeds)
@@ -2270,9 +2269,11 @@ _test_mongoc_client_select_server_retry (bool retry_succeeds)
22702269
/* let socketCheckIntervalMS pass */
22712270
_mongoc_usleep (100 * 1000);
22722271

2273-
/* second selection requires ismaster check, which fails */
2272+
/* second selection requires ping, which fails */
22742273
future = future_client_select_server (client, true, NULL, &error);
2275-
request = mock_server_receives_ismaster (server);
2274+
request = mock_server_receives_command (
2275+
server, "admin", MONGOC_QUERY_SLAVE_OK, "{'ping': 1}");
2276+
22762277
mock_server_hangs_up (request);
22772278
request_destroy (request);
22782279

@@ -2311,6 +2312,94 @@ test_mongoc_client_select_server_retry_fail (void)
23112312
}
23122313

23132314

2315+
2316+
/* CDRIVER-2172: in single mode, if the selected server has a socket that's been
2317+
* idle for socketCheckIntervalMS, check it with ping. If it fails, retry once.
2318+
*/
2319+
static void
2320+
_test_mongoc_client_fetch_stream_retry (bool retry_succeeds)
2321+
{
2322+
char *ismaster;
2323+
mock_server_t *server;
2324+
mongoc_uri_t *uri;
2325+
mongoc_client_t *client;
2326+
bson_error_t error;
2327+
request_t *request;
2328+
future_t *future;
2329+
2330+
server = mock_server_new ();
2331+
mock_server_run (server);
2332+
ismaster = bson_strdup_printf ("{'ok': 1, 'ismaster': true}");
2333+
uri = mongoc_uri_copy (mock_server_get_uri (server));
2334+
mongoc_uri_set_option_as_int32 (uri, "socketCheckIntervalMS", 50);
2335+
client = mongoc_client_new_from_uri (uri);
2336+
2337+
/* first time succeeds */
2338+
future = future_client_command_simple (
2339+
client, "db", tmp_bson ("{'cmd': 1}"), NULL, NULL, &error);
2340+
request = mock_server_receives_ismaster (server);
2341+
mock_server_replies_simple (request, ismaster);
2342+
request_destroy (request);
2343+
2344+
request = mock_server_receives_command (
2345+
server, "db", MONGOC_QUERY_SLAVE_OK, "{'cmd': 1}");
2346+
mock_server_replies_simple (request, "{'ok': 1}");
2347+
request_destroy (request);
2348+
2349+
ASSERT_OR_PRINT (future_get_bool (future), error);
2350+
future_destroy (future);
2351+
2352+
/* let socketCheckIntervalMS pass */
2353+
_mongoc_usleep (100 * 1000);
2354+
2355+
/* second selection requires ping, which fails */
2356+
future = future_client_command_simple (
2357+
client, "db", tmp_bson ("{'cmd': 1}"), NULL, NULL, &error);
2358+
2359+
request = mock_server_receives_command (
2360+
server, "admin", MONGOC_QUERY_SLAVE_OK, "{'ping': 1}");
2361+
2362+
mock_server_hangs_up (request);
2363+
request_destroy (request);
2364+
2365+
/* mongoc_client_select_server retries once */
2366+
request = mock_server_receives_ismaster (server);
2367+
if (retry_succeeds) {
2368+
mock_server_replies_simple (request, ismaster);
2369+
request_destroy (request);
2370+
2371+
request = mock_server_receives_command (
2372+
server, "db", MONGOC_QUERY_SLAVE_OK, "{'cmd': 1}");
2373+
2374+
mock_server_replies_simple (request, "{'ok': 1}");
2375+
ASSERT_OR_PRINT (future_get_bool (future), error);
2376+
} else {
2377+
mock_server_hangs_up (request);
2378+
BSON_ASSERT (!future_get_bool (future));
2379+
}
2380+
2381+
future_destroy (future);
2382+
request_destroy (request);
2383+
mongoc_client_destroy (client);
2384+
mongoc_uri_destroy (uri);
2385+
bson_free (ismaster);
2386+
mock_server_destroy (server);
2387+
}
2388+
2389+
2390+
static void
2391+
test_mongoc_client_fetch_stream_retry_succeed (void)
2392+
{
2393+
_test_mongoc_client_fetch_stream_retry (true);
2394+
}
2395+
2396+
static void
2397+
test_mongoc_client_fetch_stream_retry_fail (void)
2398+
{
2399+
_test_mongoc_client_fetch_stream_retry (false);
2400+
}
2401+
2402+
23142403
#if defined(MONGOC_ENABLE_SSL_OPENSSL) || \
23152404
defined(MONGOC_ENABLE_SSL_SECURE_TRANSPORT)
23162405
static bool
@@ -3100,6 +3189,12 @@ test_client_install (TestSuite *suite)
31003189
TestSuite_AddMockServerTest (suite,
31013190
"/Client/select_server/retry/fail",
31023191
test_mongoc_client_select_server_retry_fail);
3192+
TestSuite_AddMockServerTest (suite,
3193+
"/Client/fetch_stream/retry/succeed",
3194+
test_mongoc_client_fetch_stream_retry_succeed);
3195+
TestSuite_AddMockServerTest (suite,
3196+
"/Client/fetch_stream/retry/fail",
3197+
test_mongoc_client_fetch_stream_retry_fail);
31033198
TestSuite_AddFull (suite,
31043199
"/Client/null_error_pointer/single",
31053200
test_null_error_pointer_single,

0 commit comments

Comments
 (0)