Skip to content

Commit d261727

Browse files
alcaeuskevinAlbs
andauthored
CDRIVER-3997 Use "hello" command for monitoring if supported (#797)
* Handle helloOk flag in server description * Refactor handshake command naming * Use hello command after seeing helloOk in legacy hello response * Fix 5.0 test * Update SDAM spec tests to latest version * Reset hello_ok in mongoc_server_description_reset only * Don't update initial server description in server monitor * Always build legacy_hello_cmd with legacy hello command * Test fallback to legacy hello after connection is closed * Update wrong comment Co-authored-by: Kevin Albertson <[email protected]> * Don't modify legacy_hello_cmd when using versioned API * Lock topology mutex before obtaining server description * Don't update hello_ok when declaring API version This is not necessary since we don't rely on hello_ok if an API version was declared This partially reverts commit 7d58f04. * Remove early return when reconciling scanner nodes Co-authored-by: Kevin Albertson <[email protected]>
1 parent 3063bcf commit d261727

File tree

299 files changed

+15335
-419
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

299 files changed

+15335
-419
lines changed

src/libmongoc/src/mongoc/mongoc-server-description-private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct _mongoc_server_description_t {
6464
int64_t last_update_time_usec;
6565
bson_t last_hello_response;
6666
bool has_hello_response;
67+
bool hello_ok;
6768
const char *connection_address;
6869
/* SDAM dictates storing me/hosts/passives/arbiters after being "normalized
6970
* to lower-case" Instead, they are stored in the casing they are received,

src/libmongoc/src/mongoc/mongoc-server-description.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ mongoc_server_description_reset (mongoc_server_description_t *sd)
6868
sd->max_write_batch_size = MONGOC_DEFAULT_WRITE_BATCH_SIZE;
6969
sd->session_timeout_minutes = MONGOC_NO_SESSIONS;
7070
sd->last_write_date_ms = -1;
71+
sd->hello_ok = false;
7172

7273
/* always leave last hello in an init-ed state until we destroy sd */
7374
bson_destroy (&sd->last_hello_response);
@@ -592,6 +593,10 @@ mongoc_server_description_handle_hello (mongoc_server_description_t *sd,
592593
if (!BSON_ITER_HOLDS_BOOL (&iter))
593594
goto failure;
594595
is_primary = bson_iter_bool (&iter);
596+
} else if (strcmp ("helloOk", bson_iter_key (&iter)) == 0) {
597+
if (!BSON_ITER_HOLDS_BOOL (&iter))
598+
goto failure;
599+
sd->hello_ok = bson_iter_bool (&iter);
595600
} else if (strcmp ("me", bson_iter_key (&iter)) == 0) {
596601
if (!BSON_ITER_HOLDS_UTF8 (&iter))
597602
goto failure;

src/libmongoc/src/mongoc/mongoc-server-monitor.c

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -322,15 +322,16 @@ _server_monitor_send_and_recv_opquery (mongoc_server_monitor_t *server_monitor,
322322

323323
static bool
324324
_server_monitor_polling_hello (mongoc_server_monitor_t *server_monitor,
325+
bool hello_ok,
325326
bson_t *hello_response,
326327
bson_error_t *error)
327328
{
328329
bson_t cmd;
329330
const bson_t *hello;
330331
bool ret;
331332

332-
hello = _mongoc_topology_scanner_get_hello_cmd (
333-
server_monitor->topology->scanner);
333+
hello = _mongoc_topology_scanner_get_monitoring_cmd (
334+
server_monitor->topology->scanner, hello_ok);
334335
bson_copy_to (hello, &cmd);
335336

336337
_server_monitor_append_cluster_time (server_monitor, &cmd);
@@ -600,7 +601,7 @@ _server_monitor_awaitable_hello_recv (mongoc_server_monitor_t *server_monitor,
600601
*/
601602
static bool
602603
_server_monitor_awaitable_hello (mongoc_server_monitor_t *server_monitor,
603-
const bson_t *topology_version,
604+
const mongoc_server_description_t *description,
604605
bson_t *hello_response,
605606
bool *cancelled,
606607
bson_error_t *error)
@@ -609,12 +610,12 @@ _server_monitor_awaitable_hello (mongoc_server_monitor_t *server_monitor,
609610
const bson_t *hello;
610611
bool ret = false;
611612

612-
hello = _mongoc_topology_scanner_get_hello_cmd (
613-
server_monitor->topology->scanner);
613+
hello = _mongoc_topology_scanner_get_monitoring_cmd (
614+
server_monitor->topology->scanner, description->hello_ok);
614615
bson_copy_to (hello, &cmd);
615616

616617
_server_monitor_append_cluster_time (server_monitor, &cmd);
617-
bson_append_document (&cmd, "topologyVersion", 15, topology_version);
618+
bson_append_document (&cmd, "topologyVersion", 15, &description->topology_version);
618619
bson_append_int32 (
619620
&cmd, "maxAwaitTimeMS", 14, server_monitor->heartbeat_frequency_ms);
620621
bson_append_utf8 (&cmd, "$db", 3, "admin", 5);
@@ -861,7 +862,7 @@ mongoc_server_monitor_check_server (
861862
MONITOR_LOG (server_monitor, "awaitable hello");
862863
ret = _server_monitor_awaitable_hello (
863864
server_monitor,
864-
&previous_description->topology_version,
865+
previous_description,
865866
&hello_response,
866867
cancelled,
867868
&error);
@@ -872,7 +873,7 @@ mongoc_server_monitor_check_server (
872873
awaited = false;
873874
_server_monitor_heartbeat_started (server_monitor, awaited);
874875
ret =
875-
_server_monitor_polling_hello (server_monitor, &hello_response, &error);
876+
_server_monitor_polling_hello (server_monitor, previous_description->hello_ok, &hello_response, &error);
876877

877878
exit:
878879
duration_us = _now_us () - start_us;
@@ -1092,6 +1093,7 @@ static BSON_THREAD_FUN (_server_monitor_thread, server_monitor_void)
10921093

10931094
static bool
10941095
_server_monitor_ping_server (mongoc_server_monitor_t *server_monitor,
1096+
bool hello_ok,
10951097
int64_t *rtt_ms)
10961098
{
10971099
bool ret = false;
@@ -1111,7 +1113,7 @@ _server_monitor_ping_server (mongoc_server_monitor_t *server_monitor,
11111113
if (server_monitor->stream) {
11121114
MONITOR_LOG (server_monitor, "rtt polling hello");
11131115
ret = _server_monitor_polling_hello (
1114-
server_monitor, &hello_response, &error);
1116+
server_monitor, hello_ok, &hello_response, &error);
11151117
if (ret) {
11161118
*rtt_ms = (_now_us () - start_us) / 1000;
11171119
}
@@ -1127,12 +1129,14 @@ _server_monitor_ping_server (mongoc_server_monitor_t *server_monitor,
11271129
static BSON_THREAD_FUN (_server_monitor_rtt_thread, server_monitor_void)
11281130
{
11291131
mongoc_server_monitor_t *server_monitor;
1132+
mongoc_server_description_t *sd;
11301133

11311134
server_monitor = (mongoc_server_monitor_t *) server_monitor_void;
11321135

11331136
while (true) {
11341137
int64_t rtt_ms;
11351138
bson_error_t error;
1139+
bool hello_ok;
11361140

11371141
bson_mutex_lock (&server_monitor->shared.mutex);
11381142
if (server_monitor->shared.state != MONGOC_THREAD_RUNNING) {
@@ -1141,10 +1145,16 @@ static BSON_THREAD_FUN (_server_monitor_rtt_thread, server_monitor_void)
11411145
}
11421146
bson_mutex_unlock (&server_monitor->shared.mutex);
11431147

1144-
_server_monitor_ping_server (server_monitor, &rtt_ms);
1145-
if (rtt_ms != MONGOC_RTT_UNSET) {
1146-
mongoc_server_description_t *sd;
1148+
bson_mutex_lock (&server_monitor->topology->mutex);
1149+
sd = mongoc_topology_description_server_by_id (
1150+
&server_monitor->topology->description,
1151+
server_monitor->description->id,
1152+
&error);
1153+
hello_ok = sd ? sd->hello_ok : false;
1154+
bson_mutex_unlock (&server_monitor->topology->mutex);
11471155

1156+
_server_monitor_ping_server (server_monitor, hello_ok, &rtt_ms);
1157+
if (rtt_ms != MONGOC_RTT_UNSET) {
11481158
bson_mutex_lock (&server_monitor->topology->mutex);
11491159
sd = mongoc_topology_description_server_by_id (
11501160
&server_monitor->topology->description,

src/libmongoc/src/mongoc/mongoc-topology-scanner-private.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ typedef struct mongoc_topology_scanner_node {
5454
int64_t last_used;
5555
int64_t last_failed;
5656
bool has_auth;
57+
bool hello_ok;
5758
mongoc_host_list_t host;
5859
struct mongoc_topology_scanner *ts;
5960

@@ -84,7 +85,8 @@ typedef struct mongoc_topology_scanner {
8485
int64_t connect_timeout_msec;
8586
mongoc_topology_scanner_node_t *nodes;
8687
bson_t hello_cmd;
87-
bson_t hello_cmd_with_handshake;
88+
bson_t legacy_hello_cmd;
89+
bson_t handshake_cmd;
8890
bson_t cluster_time;
8991
bool handshake_ok_to_send;
9092
const char *appname;
@@ -130,7 +132,8 @@ mongoc_topology_scanner_valid (mongoc_topology_scanner_t *ts);
130132
void
131133
mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
132134
const mongoc_host_list_t *host,
133-
uint32_t id);
135+
uint32_t id,
136+
bool hello_ok);
134137

135138
void
136139
mongoc_topology_scanner_scan (mongoc_topology_scanner_t *ts, uint32_t id);
@@ -194,7 +197,8 @@ _mongoc_topology_scanner_get_speculative_auth_mechanism (
194197
const mongoc_uri_t *uri);
195198

196199
const bson_t *
197-
_mongoc_topology_scanner_get_hello_cmd (mongoc_topology_scanner_t *ts);
200+
_mongoc_topology_scanner_get_monitoring_cmd (mongoc_topology_scanner_t *ts,
201+
bool hello_ok);
198202

199203
const bson_t *
200204
_mongoc_topology_scanner_get_handshake_cmd (mongoc_topology_scanner_t *ts);

src/libmongoc/src/mongoc/mongoc-topology-scanner.c

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -106,22 +106,25 @@ _jumpstart_other_acmds (mongoc_topology_scanner_node_t *node,
106106
static void
107107
_add_hello (mongoc_topology_scanner_t *ts)
108108
{
109-
bson_t *cmd = &ts->hello_cmd;
110109
mongoc_server_api_t *api = ts->api;
111110

111+
BSON_APPEND_INT32 (&ts->hello_cmd, "hello", 1);
112+
BSON_APPEND_BOOL (&ts->hello_cmd, "helloOk", true);
113+
114+
BSON_APPEND_INT32 (&ts->legacy_hello_cmd, HANDSHAKE_CMD_LEGACY_HELLO, 1);
115+
BSON_APPEND_BOOL (&ts->legacy_hello_cmd, "helloOk", true);
116+
112117
if (api) {
113-
BSON_APPEND_INT32 (cmd, "hello", 1);
114-
_mongoc_cmd_append_server_api (cmd, api);
115-
} else {
116-
BSON_APPEND_INT32 (cmd, HANDSHAKE_CMD_LEGACY_HELLO, 1);
118+
_mongoc_cmd_append_server_api (&ts->hello_cmd, api);
117119
}
118120
}
119121

120122
static void
121123
_init_hello (mongoc_topology_scanner_t *ts)
122124
{
123125
bson_init (&ts->hello_cmd);
124-
bson_init (&ts->hello_cmd_with_handshake);
126+
bson_init (&ts->legacy_hello_cmd);
127+
bson_init (&ts->handshake_cmd);
125128
bson_init (&ts->cluster_time);
126129

127130
_add_hello (ts);
@@ -131,7 +134,8 @@ static void
131134
_reset_hello (mongoc_topology_scanner_t *ts)
132135
{
133136
bson_reinit (&ts->hello_cmd);
134-
bson_reinit (&ts->hello_cmd_with_handshake);
137+
bson_reinit (&ts->legacy_hello_cmd);
138+
bson_reinit (&ts->handshake_cmd);
135139

136140
_add_hello (ts);
137141
}
@@ -241,9 +245,9 @@ _mongoc_topology_scanner_parse_speculative_authentication (
241245
}
242246

243247
static bool
244-
_build_hello_with_handshake (mongoc_topology_scanner_t *ts)
248+
_build_handshake_cmd (mongoc_topology_scanner_t *ts)
245249
{
246-
bson_t *doc = &ts->hello_cmd_with_handshake;
250+
bson_t *doc = &ts->handshake_cmd;
247251
bson_t subdoc;
248252
bson_iter_t iter;
249253
const char *key;
@@ -254,7 +258,7 @@ _build_hello_with_handshake (mongoc_topology_scanner_t *ts)
254258
char buf[16];
255259

256260
bson_destroy (doc);
257-
bson_copy_to (&ts->hello_cmd, doc);
261+
bson_copy_to (ts->api ? &ts->hello_cmd : &ts->legacy_hello_cmd, doc);
258262

259263
BSON_APPEND_DOCUMENT_BEGIN (doc, HANDSHAKE_FIELD, &subdoc);
260264
res = _mongoc_handshake_build_doc_with_application (&subdoc, ts->appname);
@@ -279,12 +283,13 @@ _build_hello_with_handshake (mongoc_topology_scanner_t *ts)
279283
}
280284

281285
const bson_t *
282-
_mongoc_topology_scanner_get_hello_cmd (mongoc_topology_scanner_t *ts)
286+
_mongoc_topology_scanner_get_monitoring_cmd (mongoc_topology_scanner_t *ts,
287+
bool hello_ok)
283288
{
284-
return &ts->hello_cmd;
289+
return hello_ok || ts->api ? &ts->hello_cmd : &ts->legacy_hello_cmd;
285290
}
286291

287-
/* Caller must lock topology->mutex to protect hello_cmd_with_handshake. This
292+
/* Caller must lock topology->mutex to protect handshake_cmd. This
288293
* is called at the start of the scan in _mongoc_topology_run_background, when a
289294
* node is added in _mongoc_topology_reconcile_add_nodes, or when running a
290295
* hello directly on a node in _mongoc_stream_run_hello. */
@@ -293,19 +298,19 @@ _mongoc_topology_scanner_get_handshake_cmd (mongoc_topology_scanner_t *ts)
293298
{
294299
/* If this is the first time using the node or if it's the first time
295300
* using it after a failure, build handshake doc */
296-
if (bson_empty (&ts->hello_cmd_with_handshake)) {
297-
ts->handshake_ok_to_send = _build_hello_with_handshake (ts);
301+
if (bson_empty (&ts->handshake_cmd)) {
302+
ts->handshake_ok_to_send = _build_handshake_cmd (ts);
298303
if (!ts->handshake_ok_to_send) {
299304
MONGOC_WARNING ("Handshake doc too big, not including in hello");
300305
}
301306
}
302307

303308
/* If the doc turned out to be too big */
304309
if (!ts->handshake_ok_to_send) {
305-
return &ts->hello_cmd;
310+
return ts->api ? &ts->hello_cmd : &ts->legacy_hello_cmd;
306311
}
307312

308-
return &ts->hello_cmd_with_handshake;
313+
return &ts->handshake_cmd;
309314
}
310315

311316
static void
@@ -320,7 +325,8 @@ _begin_hello_cmd (mongoc_topology_scanner_node_t *node,
320325

321326
if (node->last_used != -1 && node->last_failed == -1) {
322327
/* The node's been used before and not failed recently */
323-
bson_copy_to (&ts->hello_cmd, &cmd);
328+
bson_copy_to (_mongoc_topology_scanner_get_monitoring_cmd (ts, node->hello_ok),
329+
&cmd);
324330
} else {
325331
bson_copy_to (_mongoc_topology_scanner_get_handshake_cmd (ts), &cmd);
326332
}
@@ -428,7 +434,8 @@ mongoc_topology_scanner_destroy (mongoc_topology_scanner_t *ts)
428434

429435
mongoc_async_destroy (ts->async);
430436
bson_destroy (&ts->hello_cmd);
431-
bson_destroy (&ts->hello_cmd_with_handshake);
437+
bson_destroy (&ts->legacy_hello_cmd);
438+
bson_destroy (&ts->handshake_cmd);
432439
bson_destroy (&ts->cluster_time);
433440
mongoc_server_api_destroy (ts->api);
434441

@@ -449,7 +456,8 @@ mongoc_topology_scanner_valid (mongoc_topology_scanner_t *ts)
449456
void
450457
mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
451458
const mongoc_host_list_t *host,
452-
uint32_t id)
459+
uint32_t id,
460+
bool hello_ok)
453461
{
454462
mongoc_topology_scanner_node_t *node;
455463

@@ -461,6 +469,7 @@ mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
461469
node->ts = ts;
462470
node->last_failed = -1;
463471
node->last_used = -1;
472+
node->hello_ok = hello_ok;
464473
bson_init (&node->speculative_auth_response);
465474

466475
DL_APPEND (ts->nodes, node);
@@ -1357,7 +1366,7 @@ _jumpstart_other_acmds (mongoc_topology_scanner_node_t *node,
13571366
}
13581367
}
13591368

1360-
/* Caller must lock topology->mutex to protect hello_cmd_with_handshake. */
1369+
/* Caller must lock topology->mutex to protect handshake_cmd. */
13611370
void
13621371
_mongoc_topology_scanner_set_server_api (mongoc_topology_scanner_t *ts,
13631372
const mongoc_server_api_t *api)

src/libmongoc/src/mongoc/mongoc-topology.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,15 @@ _mongoc_topology_reconcile_add_nodes (mongoc_server_description_t *sd,
4343
mongoc_topology_t *topology)
4444
{
4545
mongoc_topology_scanner_t *scanner = topology->scanner;
46+
mongoc_topology_scanner_node_t *node;
4647

47-
/* quickly search by id, then check if a node for this host was retired in
48-
* this scan. */
49-
if (!mongoc_topology_scanner_get_node (scanner, sd->id) &&
50-
!mongoc_topology_scanner_has_node_for_host (scanner, &sd->host)) {
51-
mongoc_topology_scanner_add (scanner, &sd->host, sd->id);
48+
/* Search by ID and update hello_ok */
49+
node = mongoc_topology_scanner_get_node (scanner, sd->id);
50+
if (node) {
51+
node->hello_ok = sd->hello_ok;
52+
} else if (!mongoc_topology_scanner_has_node_for_host (scanner, &sd->host)) {
53+
/* A node for this host was retired in this scan. */
54+
mongoc_topology_scanner_add (scanner, &sd->host, sd->id, sd->hello_ok);
5255
mongoc_topology_scanner_scan (scanner, sd->id);
5356
}
5457

@@ -189,9 +192,9 @@ _mongoc_topology_scanner_cb (uint32_t id,
189192
_mongoc_topology_update_no_lock (
190193
id, hello_response, rtt_msec, topology, error);
191194

192-
/* The processing of the hello results above may have added/removed
193-
* server descriptions. We need to reconcile that with our monitoring
194-
* agents
195+
/* The processing of the hello results above may have added, changed, or
196+
* removed server descriptions. We need to reconcile that with our
197+
* monitoring agents
195198
*/
196199
mongoc_topology_reconcile (topology);
197200

@@ -442,7 +445,7 @@ mongoc_topology_new (const mongoc_uri_t *uri, bool single_threaded)
442445
while (hl) {
443446
mongoc_topology_description_add_server (
444447
&topology->description, hl->host_and_port, &id);
445-
mongoc_topology_scanner_add (topology->scanner, hl, id);
448+
mongoc_topology_scanner_add (topology->scanner, hl, id, false);
446449

447450
hl = hl->next;
448451
}

src/libmongoc/tests/json-test.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include "json-test-monitoring.h"
2727
#include "json-test-operations.h"
2828

29-
#define MAX_NUM_TESTS 100
29+
#define MAX_NUM_TESTS 150
3030

3131
typedef void (*test_hook) (bson_t *test);
3232

src/libmongoc/tests/json/server_discovery_and_monitoring/errors/error_handling_handshake.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"a:27017",
1010
{
1111
"ok": 1,
12-
"ismaster": true,
12+
"isWritablePrimary": true,
1313
"hosts": [
1414
"a:27017"
1515
],

src/libmongoc/tests/json/server_discovery_and_monitoring/errors/non-stale-network-error.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"a:27017",
1010
{
1111
"ok": 1,
12-
"ismaster": true,
12+
"isWritablePrimary": true,
1313
"hosts": [
1414
"a:27017"
1515
],

0 commit comments

Comments
 (0)