Skip to content

Commit d4a56e7

Browse files
committed
CDRIVER-531 don't retry a host during one RS reconnect
Fix scenarios like: * We get disconnected from primary and start reconnecting * Check primary, it's down, time out after N seconds * Check secondary, it's up and tells us about primary * We re-register primary and try it again * It's still down, time out after another N seconds To fix this, remember we've tried host "H" during this reconnect and don't retry it, even if another host's peer list tells us to re-register "H".
1 parent 40853ed commit d4a56e7

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

src/mongoc/mongoc-cluster.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2001,6 +2001,52 @@ _mongoc_cluster_reconnect_direct (mongoc_cluster_t *cluster,
20012001
RETURN (true);
20022002
}
20032003

2004+
mongoc_host_list_t *
2005+
prepend_host (const mongoc_host_list_t *host, mongoc_host_list_t *list)
2006+
{
2007+
mongoc_host_list_t *cpy = bson_malloc (sizeof *host);
2008+
2009+
memcpy (cpy, host, sizeof *host);
2010+
cpy->next = list;
2011+
2012+
return cpy;
2013+
}
2014+
2015+
/*
2016+
* Case-sensitive search for host-and-port.
2017+
*/
2018+
bool
2019+
has_host (const mongoc_host_list_t *hl,
2020+
const char *host_and_port)
2021+
{
2022+
printf ("has_host %s\n", host_and_port);
2023+
2024+
while (hl) {
2025+
if (!strcmp(hl->host_and_port, host_and_port)) {
2026+
printf ("\tyes\n");
2027+
return true;
2028+
}
2029+
2030+
hl = hl->next;
2031+
}
2032+
2033+
printf ("\tno\n");
2034+
return false;
2035+
}
2036+
2037+
2038+
void
2039+
host_list_destroy (mongoc_host_list_t *hl)
2040+
{
2041+
mongoc_host_list_t *tmp;
2042+
2043+
while (hl) {
2044+
tmp = hl->next;
2045+
bson_free (hl);
2046+
hl = tmp;
2047+
}
2048+
}
2049+
20042050

20052051
/*
20062052
*--------------------------------------------------------------------------
@@ -2031,6 +2077,7 @@ _mongoc_cluster_reconnect_replica_set (mongoc_cluster_t *cluster,
20312077
{
20322078
const mongoc_host_list_t *hosts;
20332079
const mongoc_host_list_t *iter;
2080+
mongoc_host_list_t *failed_hosts = NULL;
20342081
mongoc_cluster_node_t node;
20352082
mongoc_cluster_node_t *saved_nodes;
20362083
size_t saved_nodes_len;
@@ -2102,6 +2149,7 @@ _mongoc_cluster_reconnect_replica_set (mongoc_cluster_t *cluster,
21022149
stream = _mongoc_client_create_stream(cluster->client, iter, error);
21032150
if (!stream) {
21042151
MONGOC_WARNING("Failed connection to %s", iter->host_and_port);
2152+
failed_hosts = prepend_host (iter, failed_hosts);
21052153
continue;
21062154
}
21072155

@@ -2110,6 +2158,7 @@ _mongoc_cluster_reconnect_replica_set (mongoc_cluster_t *cluster,
21102158
node.stream = stream;
21112159

21122160
if (!_mongoc_cluster_ismaster (cluster, &node, error)) {
2161+
failed_hosts = prepend_host (iter, failed_hosts);
21132162
_mongoc_cluster_node_destroy (&node);
21142163
continue;
21152164
}
@@ -2157,6 +2206,11 @@ _mongoc_cluster_reconnect_replica_set (mongoc_cluster_t *cluster,
21572206
continue;
21582207
}
21592208

2209+
if (has_host (failed_hosts, host.host_and_port)) {
2210+
MONGOC_INFO ("Skipping reconnection to %s", host.host_and_port);
2211+
continue;
2212+
}
2213+
21602214
stream = NULL;
21612215

21622216
for (j = 0; j < saved_nodes_len; j++) {
@@ -2252,6 +2306,8 @@ _mongoc_cluster_reconnect_replica_set (mongoc_cluster_t *cluster,
22522306

22532307
bson_free(saved_nodes);
22542308

2309+
host_list_destroy (failed_hosts);
2310+
22552311
RETURN(rval);
22562312
}
22572313

0 commit comments

Comments
 (0)