Skip to content

Commit 3f1a09b

Browse files
committed
CDRIVER-2172 robust mongoc_client_server_select
If a server is selected and its socket has not been used in 5 seconds, check if the socket is still good and retry once if not.
1 parent eb9b73c commit 3f1a09b

File tree

8 files changed

+212
-122
lines changed

8 files changed

+212
-122
lines changed

build/generate-future-functions.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,13 @@
253253
[param("mongoc_client_ptr", "client"),
254254
param("bson_error_ptr", "error")]),
255255

256+
future_function("mongoc_server_description_ptr",
257+
"mongoc_client_select_server",
258+
[param("mongoc_client_ptr", "client"),
259+
param("bool", "for_writes"),
260+
param("const_mongoc_read_prefs_ptr", "prefs"),
261+
param("bson_error_ptr", "error")]),
262+
256263
future_function("bool",
257264
"mongoc_database_command_simple",
258265
[param("mongoc_database_ptr", "database"),

src/mongoc/mongoc-client.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2167,6 +2167,10 @@ mongoc_client_select_server (mongoc_client_t *client,
21672167
const mongoc_read_prefs_t *prefs,
21682168
bson_error_t *error)
21692169
{
2170+
mongoc_ss_optype_t optype = for_writes ? MONGOC_SS_WRITE : MONGOC_SS_READ;
2171+
mongoc_server_description_t *sd;
2172+
bson_error_t tmp_err;
2173+
21702174
if (for_writes && prefs) {
21712175
bson_set_error (error,
21722176
MONGOC_ERROR_SERVER_SELECTION,
@@ -2179,10 +2183,25 @@ mongoc_client_select_server (mongoc_client_t *client,
21792183
return NULL;
21802184
}
21812185

2182-
return mongoc_topology_select (client->topology,
2183-
for_writes ? MONGOC_SS_WRITE : MONGOC_SS_READ,
2184-
prefs,
2185-
error);
2186+
sd = mongoc_topology_select (client->topology, optype, prefs, error);
2187+
if (!sd) {
2188+
return NULL;
2189+
}
2190+
2191+
if (mongoc_cluster_check_interval (&client->cluster, sd->id, &tmp_err)) {
2192+
/* check not required, or it succeeded */
2193+
return sd;
2194+
}
2195+
2196+
/* check failed, retry once */
2197+
mongoc_server_description_destroy (sd);
2198+
sd = mongoc_topology_select (client->topology, optype, prefs, error);
2199+
if (sd) {
2200+
return sd;
2201+
}
2202+
2203+
memcpy (error, &tmp_err, sizeof tmp_err);
2204+
return NULL;
21862205
}
21872206

21882207
bool

src/mongoc/mongoc-cluster-private.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ _mongoc_cluster_buffer_iovec (mongoc_iovec_t *iov,
100100
int skip,
101101
char *buffer);
102102

103+
bool
104+
mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
105+
uint32_t server_id,
106+
bson_error_t *error);
107+
103108
bool
104109
mongoc_cluster_sendv_to_server (mongoc_cluster_t *cluster,
105110
mongoc_rpc_t *rpcs,

src/mongoc/mongoc-cluster.c

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,10 +2162,34 @@ mongoc_cluster_get_max_msg_size (mongoc_cluster_t *cluster)
21622162
}
21632163

21642164

2165-
static bool
2166-
_mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
2167-
uint32_t server_id,
2168-
bson_error_t *error)
2165+
/*
2166+
*--------------------------------------------------------------------------
2167+
*
2168+
* mongoc_cluster_check_interval --
2169+
*
2170+
* Server Discovery And Monitoring Spec:
2171+
*
2172+
* "Only for single-threaded clients. If a server is selected that has an
2173+
* existing connection that has been idle for socketCheckIntervalMS, the
2174+
* driver MUST check it, by using it for an ismaster call. Whether
2175+
* ismaster succeeds or fails, the driver MUST update its topology
2176+
* description with the outcome. Then, if the check failed, the driver
2177+
* MUST retry server selection once."
2178+
*
2179+
* Returns:
2180+
* True if the check succeeded or no check was required, false if the
2181+
* check failed.
2182+
*
2183+
* Side effects:
2184+
* If a check is needed, updates the topology with its outcome.
2185+
*
2186+
*--------------------------------------------------------------------------
2187+
*/
2188+
2189+
bool
2190+
mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
2191+
uint32_t server_id,
2192+
bson_error_t *error)
21692193
{
21702194
mongoc_topology_t *topology;
21712195
mongoc_topology_scanner_node_t *scanner_node;
@@ -2223,7 +2247,7 @@ _mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
22232247
"admin",
22242248
&command,
22252249
&reply,
2226-
error);
2250+
error /* OUT */);
22272251

22282252
now = bson_get_monotonic_time ();
22292253

@@ -2234,7 +2258,7 @@ _mongoc_cluster_check_interval (mongoc_cluster_t *cluster,
22342258
&reply,
22352259
(now - before_ismaster) /
22362260
1000, /* RTT_MS */
2237-
error);
2261+
error /* IN */);
22382262

22392263
bson_destroy (&reply);
22402264
}
@@ -2302,23 +2326,11 @@ mongoc_cluster_sendv_to_server (mongoc_cluster_t *cluster,
23022326
write_concern = cluster->client->write_concern;
23032327
}
23042328

2305-
if (!_mongoc_cluster_check_interval (
2306-
cluster, server_stream->sd->id, error)) {
2307-
GOTO (done);
2308-
}
2309-
23102329
_mongoc_array_clear (&cluster->iov);
23112330
#ifdef MONGOC_ENABLE_COMPRESSION
23122331
compressor_id = mongoc_server_description_compressor_id (server_stream->sd);
23132332
#endif
23142333

2315-
/*
2316-
* TODO: We can probably remove the need for sendv and just do send since
2317-
* we support write concerns now. Also, we clobber our getlasterror on
2318-
* each subsequent mutation. It's okay, since it comes out correct anyway,
2319-
* just useless work (and technically the request_id changes).
2320-
*/
2321-
23222334
need_gle = _mongoc_rpc_needs_gle (rpc, write_concern);
23232335
_mongoc_cluster_inc_egress_rpc (rpc);
23242336
_mongoc_rpc_gather (rpc, &cluster->iov);

tests/mock_server/future-functions.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,28 @@ background_mongoc_client_get_database_names (void *data)
456456
return NULL;
457457
}
458458

459+
static void *
460+
background_mongoc_client_select_server (void *data)
461+
{
462+
future_t *future = (future_t *) data;
463+
future_value_t return_value;
464+
465+
return_value.type = future_value_mongoc_server_description_ptr_type;
466+
467+
future_value_set_mongoc_server_description_ptr (
468+
&return_value,
469+
mongoc_client_select_server (
470+
future_value_get_mongoc_client_ptr (future_get_param (future, 0)),
471+
future_value_get_bool (future_get_param (future, 1)),
472+
future_value_get_const_mongoc_read_prefs_ptr (future_get_param (future, 2)),
473+
future_value_get_bson_error_ptr (future_get_param (future, 3))
474+
));
475+
476+
future_resolve (future, return_value);
477+
478+
return NULL;
479+
}
480+
459481
static void *
460482
background_mongoc_database_command_simple (void *data)
461483
{
@@ -1240,6 +1262,32 @@ future_client_get_database_names (
12401262
return future;
12411263
}
12421264

1265+
future_t *
1266+
future_client_select_server (
1267+
mongoc_client_ptr client,
1268+
bool for_writes,
1269+
const_mongoc_read_prefs_ptr prefs,
1270+
bson_error_ptr error)
1271+
{
1272+
future_t *future = future_new (future_value_mongoc_server_description_ptr_type,
1273+
4);
1274+
1275+
future_value_set_mongoc_client_ptr (
1276+
future_get_param (future, 0), client);
1277+
1278+
future_value_set_bool (
1279+
future_get_param (future, 1), for_writes);
1280+
1281+
future_value_set_const_mongoc_read_prefs_ptr (
1282+
future_get_param (future, 2), prefs);
1283+
1284+
future_value_set_bson_error_ptr (
1285+
future_get_param (future, 3), error);
1286+
1287+
future_start (future, background_mongoc_client_select_server);
1288+
return future;
1289+
}
1290+
12431291
future_t *
12441292
future_database_command_simple (
12451293
mongoc_database_ptr database,

tests/mock_server/future-functions.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,16 @@ future_client_get_database_names (
223223
);
224224

225225

226+
future_t *
227+
future_client_select_server (
228+
229+
mongoc_client_ptr client,
230+
bool for_writes,
231+
const_mongoc_read_prefs_ptr prefs,
232+
bson_error_ptr error
233+
);
234+
235+
226236
future_t *
227237
future_database_command_simple (
228238

tests/test-mongoc-client.c

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,6 +2228,89 @@ test_mongoc_client_select_server_error_pooled (void)
22282228
}
22292229

22302230

2231+
/* CDRIVER-2172: in single mode, if the selected server has a socket that's been
2232+
* idle for socketCheckIntervalMS, check it with an ismaster call. If that
2233+
* fails, retry once.
2234+
*/
2235+
static void
2236+
_test_mongoc_client_select_server_retry (bool retry_succeeds)
2237+
{
2238+
char *ismaster;
2239+
mock_server_t *server;
2240+
mongoc_uri_t *uri;
2241+
mongoc_client_t *client;
2242+
bson_error_t error;
2243+
request_t *request;
2244+
future_t *future;
2245+
mongoc_server_description_t *sd;
2246+
2247+
server = mock_server_new ();
2248+
mock_server_run (server);
2249+
ismaster = bson_strdup_printf ("{'ok': 1, 'ismaster': true,"
2250+
" 'secondary': false,"
2251+
" 'setName': 'rs', 'hosts': ['%s']}",
2252+
mock_server_get_host_and_port (server));
2253+
2254+
uri = mongoc_uri_copy (mock_server_get_uri (server));
2255+
mongoc_uri_set_option_as_utf8 (uri, "replicaSet", "rs");
2256+
mongoc_uri_set_option_as_int32 (uri, "socketCheckIntervalMS", 50);
2257+
client = mongoc_client_new_from_uri (uri);
2258+
2259+
/* first selection succeeds */
2260+
future = future_client_select_server (client, true, NULL, &error);
2261+
request = mock_server_receives_ismaster (server);
2262+
mock_server_replies_simple (request, ismaster);
2263+
request_destroy (request);
2264+
sd = future_get_mongoc_server_description_ptr (future);
2265+
ASSERT_OR_PRINT (sd, error);
2266+
2267+
future_destroy (future);
2268+
mongoc_server_description_destroy (sd);
2269+
2270+
/* let socketCheckIntervalMS pass */
2271+
_mongoc_usleep (100 * 1000);
2272+
2273+
/* second selection requires ismaster check, which fails */
2274+
future = future_client_select_server (client, true, NULL, &error);
2275+
request = mock_server_receives_ismaster (server);
2276+
mock_server_hangs_up (request);
2277+
request_destroy (request);
2278+
2279+
/* mongoc_client_select_server retries once */
2280+
request = mock_server_receives_ismaster (server);
2281+
if (retry_succeeds) {
2282+
mock_server_replies_simple (request, ismaster);
2283+
sd = future_get_mongoc_server_description_ptr (future);
2284+
ASSERT_OR_PRINT (sd, error);
2285+
mongoc_server_description_destroy (sd);
2286+
} else {
2287+
mock_server_hangs_up (request);
2288+
sd = future_get_mongoc_server_description_ptr (future);
2289+
BSON_ASSERT (sd == NULL);
2290+
}
2291+
2292+
future_destroy (future);
2293+
request_destroy (request);
2294+
mongoc_client_destroy (client);
2295+
mongoc_uri_destroy (uri);
2296+
bson_free (ismaster);
2297+
mock_server_destroy (server);
2298+
}
2299+
2300+
2301+
static void
2302+
test_mongoc_client_select_server_retry_succeed (void)
2303+
{
2304+
_test_mongoc_client_select_server_retry (true);
2305+
}
2306+
2307+
static void
2308+
test_mongoc_client_select_server_retry_fail (void)
2309+
{
2310+
_test_mongoc_client_select_server_retry (false);
2311+
}
2312+
2313+
22312314
#if defined(MONGOC_ENABLE_SSL_OPENSSL) || \
22322315
defined(MONGOC_ENABLE_SSL_SECURE_TRANSPORT)
22332316
static bool
@@ -3011,6 +3094,12 @@ test_client_install (TestSuite *suite)
30113094
TestSuite_AddLive (suite,
30123095
"/Client/select_server/err/pooled",
30133096
test_mongoc_client_select_server_error_pooled);
3097+
TestSuite_AddMockServerTest (suite,
3098+
"/Client/select_server/retry/succeed",
3099+
test_mongoc_client_select_server_retry_succeed);
3100+
TestSuite_AddMockServerTest (suite,
3101+
"/Client/select_server/retry/fail",
3102+
test_mongoc_client_select_server_retry_fail);
30143103
TestSuite_AddFull (suite,
30153104
"/Client/null_error_pointer/single",
30163105
test_null_error_pointer_single,

0 commit comments

Comments
 (0)