Skip to content

Commit ddd99c2

Browse files
committed
CDRIVER-751 check discovered servers in same scan
Bug in unreleased Server Discovery And Monitoring implementation. The spec requires a single-threaded scan to keep going until all nodes have been checked, *including* nodes discovered during the scan. But _mongoc_topology_do_blocking_scan quit once all servers known at the *start* of the scan were checked. Discovered nodes are added to the topology description, but not checked until the next scan. With this patch, a block scan keeps going until all servers are checked, including those discovered during this scan. TODO: test_topology_scanner_oscillate currently fails, revealing that in a pathological scenario the driver now errs on the other side, and scans forever.
1 parent 9e58464 commit ddd99c2

7 files changed

+259
-26
lines changed

src/mongoc/mongoc-topology-scanner-private.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
8888
const mongoc_host_list_t *host,
8989
uint32_t id);
9090

91+
void
92+
mongoc_topology_scanner_add_and_scan (mongoc_topology_scanner_t *ts,
93+
const mongoc_host_list_t *host,
94+
uint32_t id,
95+
int64_t timeout_msec);
96+
9197
void
9298
mongoc_topology_scanner_node_destroy (mongoc_topology_scanner_node_t *node,
9399
bool failed);

src/mongoc/mongoc-topology-scanner.c

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,22 @@
2626
#endif
2727

2828
#include "mongoc-counters-private.h"
29-
#include "mongoc-async-private.h"
30-
#include "mongoc-async-cmd-private.h"
3129
#include "utlist.h"
3230
#include "mongoc-topology-private.h"
3331

3432
#undef MONGOC_LOG_DOMAIN
3533
#define MONGOC_LOG_DOMAIN "topology_scanner"
3634

35+
static bool
36+
mongoc_topology_scanner_node_setup (mongoc_topology_scanner_node_t *node);
37+
38+
static void
39+
mongoc_topology_scanner_ismaster_handler (mongoc_async_cmd_result_t async_status,
40+
const bson_t *ismaster_response,
41+
int64_t rtt_msec,
42+
void *data,
43+
bson_error_t *error);
44+
3745
mongoc_topology_scanner_t *
3846
mongoc_topology_scanner_new (const mongoc_uri_t *uri,
3947
mongoc_topology_scanner_cb_t cb,
@@ -87,10 +95,10 @@ mongoc_topology_scanner_destroy (mongoc_topology_scanner_t *ts)
8795
bson_free (ts);
8896
}
8997

90-
void
91-
mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
92-
const mongoc_host_list_t *host,
93-
uint32_t id)
98+
static mongoc_topology_scanner_node_t *
99+
_add_node (mongoc_topology_scanner_t *ts,
100+
const mongoc_host_list_t *host,
101+
uint32_t id)
94102
{
95103
mongoc_topology_scanner_node_t *node = (mongoc_topology_scanner_node_t *)bson_malloc0 (sizeof (*node));
96104

@@ -102,6 +110,37 @@ mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
102110

103111
DL_APPEND(ts->nodes, node);
104112

113+
return node;
114+
}
115+
116+
void
117+
mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
118+
const mongoc_host_list_t *host,
119+
uint32_t id)
120+
{
121+
_add_node (ts, host, id);
122+
}
123+
124+
125+
void
126+
mongoc_topology_scanner_add_and_scan (mongoc_topology_scanner_t *ts,
127+
const mongoc_host_list_t *host,
128+
uint32_t id,
129+
int64_t timeout_msec)
130+
{
131+
mongoc_topology_scanner_node_t *node;
132+
133+
node = _add_node (ts, host, id);
134+
135+
if (mongoc_topology_scanner_node_setup (node)) {
136+
node->cmd = mongoc_async_cmd (
137+
ts->async, node->stream, ts->setup,
138+
node->host.host, "admin",
139+
&ts->ismaster_cmd,
140+
&mongoc_topology_scanner_ismaster_handler,
141+
node, (int32_t) timeout_msec);
142+
}
143+
105144
return;
106145
}
107146

src/mongoc/mongoc-topology.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,33 @@ _mongoc_topology_request_scan (mongoc_topology_t *topology);
3232

3333
static bool
3434
_mongoc_topology_reconcile_add_nodes (void *item,
35-
void *ctx)
35+
void *ctx)
3636
{
37-
mongoc_server_description_t *sd = (mongoc_server_description_t *)item;
38-
mongoc_topology_scanner_t *scanner = (mongoc_topology_scanner_t *)ctx;
37+
mongoc_server_description_t *sd = item;
38+
mongoc_topology_t *topology = (mongoc_topology_t *)ctx;
39+
mongoc_topology_scanner_t *scanner = topology->scanner;
3940

4041
if (! mongoc_topology_scanner_get_node (scanner, sd->id)) {
41-
mongoc_topology_scanner_add (scanner, &sd->host, sd->id);
42+
mongoc_topology_scanner_add_and_scan (scanner, &sd->host, sd->id,
43+
topology->timeout_msec);
4244
}
4345

4446
return true;
4547
}
4648

4749
void
48-
mongoc_topology_reconcile (mongoc_topology_description_t *description,
49-
mongoc_topology_scanner_t *scanner) {
50+
mongoc_topology_reconcile (mongoc_topology_t *topology) {
5051
mongoc_topology_scanner_node_t *ele, *tmp;
52+
mongoc_topology_description_t *description;
53+
mongoc_topology_scanner_t *scanner;
54+
55+
description = &topology->description;
56+
scanner = topology->scanner;
5157

5258
/* Add newly discovered nodes */
53-
mongoc_set_for_each(description->servers, _mongoc_topology_reconcile_add_nodes, scanner);
59+
mongoc_set_for_each(description->servers,
60+
_mongoc_topology_reconcile_add_nodes,
61+
topology);
5462

5563
/* Remove removed nodes */
5664
DL_FOREACH_SAFE (scanner->nodes, ele, tmp) {
@@ -105,7 +113,7 @@ _mongoc_topology_scanner_cb (uint32_t id,
105113
* server descriptions. We need to reconcile that with our monitoring agents
106114
*/
107115

108-
mongoc_topology_reconcile(&topology->description, topology->scanner);
116+
mongoc_topology_reconcile(topology);
109117

110118
/* TODO only wake up all clients if we found any topology changes */
111119
mongoc_cond_broadcast (&topology->cond_client);

tests/TestSuite.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,18 @@ extern "C" {
9898
} \
9999
} while (0)
100100

101+
#define AWAIT(_condition) \
102+
do { \
103+
int64_t _start = bson_get_monotonic_time (); \
104+
while (! _condition) { \
105+
if (bson_get_monotonic_time() - _start > 1000 * 1000) { \
106+
fprintf (stderr, \
107+
"%s:%d %s(): \"%s\" still false after 1 second\n", \
108+
__FILE__, __LINE__, __FUNCTION__, #_condition); \
109+
abort (); \
110+
} \
111+
} \
112+
} while (0)
101113

102114
#define MAX_TEST_NAME_LENGTH 500
103115

tests/test-mongoc-topology-reconcile.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,8 @@ _test_topology_reconcile_rs (bool pooled)
131131
RS_RESPONSE_TO_ISMASTER (server1, true, false, server0, server1);
132132

133133
/* provide secondary in seed list */
134-
/* TODO: CDRIVER-751 serverSelectionTryOnce=false shouldn't be needed */
135134
uri_str = bson_strdup_printf (
136-
"mongodb://%s/?replicaSet=rs&connectTimeoutMS=10"
137-
"&serverSelectionTryOnce=false",
135+
"mongodb://%s/?replicaSet=rs",
138136
mock_server_get_host_and_port (server0));
139137

140138
uri = mongoc_uri_new (uri_str);
@@ -153,7 +151,6 @@ _test_topology_reconcile_rs (bool pooled)
153151

154152
/*
155153
* server0 is selected, server1 is discovered and added to scanner.
156-
* TODO: CDRIVER-751 actually it isn't.
157154
*/
158155
assert (selects_server (client, secondary_read_prefs, server0));
159156
assert (get_node (client->topology->scanner,
@@ -168,7 +165,16 @@ _test_topology_reconcile_rs (bool pooled)
168165
* remove server0 from set. tag primary, select w/ tags to trigger re-scan.
169166
*/
170167
RS_RESPONSE_TO_ISMASTER (server1, true, true, server1); /* server0 absent */
168+
169+
if (!pooled) {
170+
/* TODO: SPEC-217 this selection should trigger one rescan but doesn't */
171+
assert (!client->topology->stale);
172+
assert (!selects_server (client, tag_read_prefs, server1));
173+
assert (client->topology->stale);
174+
}
175+
171176
assert (selects_server (client, tag_read_prefs, server1));
177+
assert (!client->topology->stale);
172178

173179
assert (!get_node (client->topology->scanner,
174180
mock_server_get_host_and_port (server0)));

tests/test-mongoc-topology-scanner.c

Lines changed: 166 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
#include <mongoc.h>
22

3-
#include "mongoc-topology-scanner-private.h"
3+
#include "mongoc-util-private.h"
4+
#include "mongoc-client-private.h"
5+
46
#include "mongoc-tests.h"
57
#include "TestSuite.h"
68
#include "mock_server/mock-server.h"
9+
#include "mock_server/future.h"
10+
#include "mock_server/future-functions.h"
711

812
#undef MONGOC_LOG_DOMAIN
913
#define MONGOC_LOG_DOMAIN "topology-scanner-test"
@@ -125,11 +129,172 @@ test_topology_scanner_ssl ()
125129
#endif
126130

127131

132+
/*
133+
* Servers discovered by a scan should be checked during that scan, CDRIVER-751.
134+
*/
135+
void
136+
test_topology_scanner_discovery ()
137+
{
138+
mock_server_t *primary;
139+
mock_server_t *secondary;
140+
char *primary_response;
141+
char *secondary_response;
142+
mongoc_client_t *client;
143+
mongoc_topology_scanner_t *scanner;
144+
char *uri_str;
145+
mongoc_read_prefs_t *primary_pref;
146+
bson_error_t error;
147+
future_t *future;
148+
request_t *request;
149+
mongoc_server_description_t *sd;
150+
151+
primary = mock_server_new ();
152+
secondary = mock_server_new ();
153+
mock_server_run (primary);
154+
mock_server_run (secondary);
155+
156+
primary_response = bson_strdup_printf (
157+
"{'ok': 1, "
158+
" 'ismaster': true,"
159+
" 'setName': 'rs',"
160+
" 'hosts': ['%s', '%s']}",
161+
mock_server_get_host_and_port (primary),
162+
mock_server_get_host_and_port (secondary));
163+
164+
secondary_response = bson_strdup_printf (
165+
"{'ok': 1, "
166+
" 'ismaster': false,"
167+
" 'secondary': true,"
168+
" 'setName': 'rs',"
169+
" 'hosts': ['%s', '%s']}",
170+
mock_server_get_host_and_port (primary),
171+
mock_server_get_host_and_port (secondary));
172+
173+
uri_str = bson_strdup_printf ("mongodb://%s/?replicaSet=rs",
174+
mock_server_get_host_and_port (primary));
175+
client = mongoc_client_new (uri_str);
176+
scanner = client->topology->scanner;
177+
primary_pref = mongoc_read_prefs_new (MONGOC_READ_PRIMARY);
178+
179+
future = future_topology_select (client->topology, MONGOC_SS_READ,
180+
primary_pref, 15, &error);
181+
182+
/* a single scan discovers *and* checks the secondary */
183+
request = mock_server_receives_ismaster (primary);
184+
mock_server_replies_simple (request, primary_response);
185+
request_destroy (request);
186+
187+
/* let client process that response */
188+
_mongoc_usleep (250 * 1000);
189+
190+
/* a check of the secondary is scheduled in this scan */
191+
AWAIT (scanner->async->ncmds);
192+
193+
request = mock_server_receives_ismaster (secondary);
194+
mock_server_replies_simple (request, secondary_response);
195+
/* scan completes */
196+
AWAIT (!scanner->async->ncmds);
197+
ASSERT_OR_PRINT ((sd = future_get_mongoc_server_description_ptr (future)),
198+
error);
199+
200+
mongoc_server_description_destroy (sd);
201+
request_destroy (request);
202+
mongoc_read_prefs_destroy (primary_pref);
203+
bson_free (secondary_response);
204+
bson_free (primary_response);
205+
bson_free (uri_str);
206+
mongoc_client_destroy (client);
207+
mock_server_destroy (secondary);
208+
mock_server_destroy (primary);
209+
}
210+
211+
212+
/*
213+
* Check that scanner doesn't spinlock if two primaries point at each other.
214+
*/
215+
void
216+
test_topology_scanner_oscillate ()
217+
{
218+
mock_server_t *server0;
219+
mock_server_t *server1;
220+
char *server0_response;
221+
char *server1_response;
222+
mongoc_client_t *client;
223+
mongoc_topology_scanner_t *scanner;
224+
char *uri_str;
225+
mongoc_read_prefs_t *primary_pref;
226+
bson_error_t error;
227+
future_t *future;
228+
request_t *request;
229+
230+
server0 = mock_server_new ();
231+
server1 = mock_server_new ();
232+
mock_server_run (server0);
233+
mock_server_run (server1);
234+
235+
/* server 0 says it's primary, but only server 1 is in the set */
236+
server0_response = bson_strdup_printf (
237+
"{'ok': 1, "
238+
" 'ismaster': true,"
239+
" 'setName': 'rs',"
240+
" 'hosts': ['%s']}",
241+
mock_server_get_host_and_port (server1));
242+
243+
/* the opposite */
244+
server1_response = bson_strdup_printf (
245+
"{'ok': 1, "
246+
" 'ismaster': true,"
247+
" 'setName': 'rs',"
248+
" 'hosts': ['%s']}",
249+
mock_server_get_host_and_port (server0));
250+
251+
/* start with server 0 */
252+
uri_str = bson_strdup_printf ("mongodb://%s/?replicaSet=rs",
253+
mock_server_get_host_and_port (server0));
254+
client = mongoc_client_new (uri_str);
255+
scanner = client->topology->scanner;
256+
primary_pref = mongoc_read_prefs_new (MONGOC_READ_PRIMARY);
257+
258+
assert (!scanner->async->ncmds);
259+
future = future_topology_select (client->topology, MONGOC_SS_READ,
260+
primary_pref, 15, &error);
261+
262+
/* a single scan discovers servers 0 and 1 */
263+
request = mock_server_receives_ismaster (server0);
264+
mock_server_replies_simple (request, server0_response);
265+
request_destroy (request);
266+
267+
/* let client process that response */
268+
_mongoc_usleep (250 * 1000);
269+
AWAIT (scanner->async->ncmds);
270+
271+
request = mock_server_receives_ismaster (server1);
272+
mock_server_replies_simple (request, server1_response);
273+
AWAIT (!scanner->async->ncmds);
274+
275+
assert (!future_get_mongoc_server_description_ptr (future));
276+
request_destroy (request);
277+
mongoc_read_prefs_destroy (primary_pref);
278+
bson_free (server1_response);
279+
bson_free (server0_response);
280+
bson_free (uri_str);
281+
mongoc_client_destroy (client);
282+
mock_server_destroy (server1);
283+
mock_server_destroy (server0);
284+
}
285+
286+
128287
void
129288
test_topology_scanner_install (TestSuite *suite)
130289
{
131290
TestSuite_Add (suite, "/TOPOLOGY/scanner", test_topology_scanner);
132291
#ifdef MONGOC_ENABLE_SSL
133292
TestSuite_Add (suite, "/TOPOLOGY/scanner_ssl", test_topology_scanner_ssl);
134293
#endif
294+
TestSuite_Add (suite, "/TOPOLOGY/scanner_discovery",
295+
test_topology_scanner_discovery);
296+
/*
297+
TestSuite_Add (suite, "/TOPOLOGY/scanner_oscillate",
298+
test_topology_scanner_oscillate);
299+
*/
135300
}

0 commit comments

Comments
 (0)