Skip to content

Commit 635be2f

Browse files
authored
CDRIVER-4058 Load balancer: Error handling (#831)
1 parent 464fcda commit 635be2f

21 files changed

+760
-73
lines changed

src/libmongoc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,7 @@ set (SOURCES ${SOURCES}
508508
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-database.c
509509
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-error.c
510510
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-find-and-modify.c
511+
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-generation-map.c
511512
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-init.c
512513
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-gridfs.c
513514
${PROJECT_SOURCE_DIR}/src/mongoc/mongoc-gridfs-bucket.c
@@ -951,6 +952,7 @@ set (test-libmongoc-sources
951952
${PROJECT_SOURCE_DIR}/tests/test-mongoc-error.c
952953
${PROJECT_SOURCE_DIR}/tests/test-mongoc-exhaust.c
953954
${PROJECT_SOURCE_DIR}/tests/test-mongoc-find-and-modify.c
955+
${PROJECT_SOURCE_DIR}/tests/test-mongoc-generation-map.c
954956
${PROJECT_SOURCE_DIR}/tests/test-mongoc-gridfs-bucket.c
955957
${PROJECT_SOURCE_DIR}/tests/test-mongoc-gridfs-file-page.c
956958
${PROJECT_SOURCE_DIR}/tests/test-mongoc-gridfs.c

src/libmongoc/src/mongoc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ set (src_libmongoc_src_mongoc_DIST_noinst_hs
108108
mongoc-error-private.h
109109
mongoc-find-and-modify-private.h
110110
mongoc-flags-private.h
111+
mongoc-generation-map-private.h
111112
mongoc-gridfs-bucket-file-private.h
112113
mongoc-gridfs-bucket-private.h
113114
mongoc-gridfs-file-list-private.h
@@ -205,6 +206,7 @@ set (src_libmongoc_src_mongoc_DIST_cs
205206
mongoc-database.c
206207
mongoc-error.c
207208
mongoc-find-and-modify.c
209+
mongoc-generation-map.c
208210
mongoc-host-list.c
209211
mongoc-init.c
210212
mongoc-interrupt.c

src/libmongoc/src/mongoc/mongoc-cluster-private.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ BSON_BEGIN_DECLS
4444
typedef struct _mongoc_cluster_node_t {
4545
mongoc_stream_t *stream;
4646
char *connection_address;
47-
uint32_t generation;
4847
/* handshake_sd is a server description created from the handshake on the stream. */
4948
mongoc_server_description_t *handshake_sd;
5049
} mongoc_cluster_node_t;

src/libmongoc/src/mongoc/mongoc-cluster.c

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ _handle_not_primary_error (mongoc_cluster_t *cluster,
102102
reply,
103103
NULL,
104104
server_stream->sd->max_wire_version,
105-
server_stream->sd->generation)) {
105+
server_stream->sd->generation,
106+
&server_stream->sd->service_id)) {
106107
mongoc_cluster_disconnect_node (cluster, server_id);
107108
}
108109
bson_mutex_unlock (&cluster->client->topology->mutex);
@@ -138,7 +139,8 @@ _handle_network_error (mongoc_cluster_t *cluster,
138139
NULL,
139140
why,
140141
server_stream->sd->max_wire_version,
141-
server_stream->sd->generation);
142+
server_stream->sd->generation,
143+
&server_stream->sd->service_id);
142144
bson_mutex_unlock (&topology->mutex);
143145
/* Always disconnect the current connection on network error. */
144146
mongoc_cluster_disconnect_node (cluster, server_id);
@@ -1959,7 +1961,6 @@ _mongoc_cluster_node_dtor (void *data_, void *ctx_)
19591961

19601962
static mongoc_cluster_node_t *
19611963
_mongoc_cluster_node_new (mongoc_stream_t *stream,
1962-
uint32_t generation,
19631964
const char *connection_address)
19641965
{
19651966
mongoc_cluster_node_t *node;
@@ -1972,7 +1973,6 @@ _mongoc_cluster_node_new (mongoc_stream_t *stream,
19721973

19731974
node->stream = stream;
19741975
node->connection_address = bson_strdup (connection_address);
1975-
node->generation = generation;
19761976

19771977
return node;
19781978
}
@@ -2065,7 +2065,6 @@ _mongoc_cluster_finish_speculative_auth (
20652065
*/
20662066
static mongoc_cluster_node_t *
20672067
_mongoc_cluster_add_node (mongoc_cluster_t *cluster,
2068-
uint32_t generation,
20692068
uint32_t server_id,
20702069
bson_error_t *error /* OUT */)
20712070
{
@@ -2104,7 +2103,7 @@ _mongoc_cluster_add_node (mongoc_cluster_t *cluster,
21042103

21052104
/* take critical fields from a fresh hello */
21062105
cluster_node =
2107-
_mongoc_cluster_node_new (stream, generation, host->host_and_port);
2106+
_mongoc_cluster_node_new (stream, host->host_and_port);
21082107

21092108
handshake_sd = _mongoc_cluster_run_hello (cluster,
21102109
cluster_node,
@@ -2140,10 +2139,14 @@ _mongoc_cluster_add_node (mongoc_cluster_t *cluster,
21402139

21412140
/* Transfer ownership of the server description into the cluster node. */
21422141
cluster_node->handshake_sd = handshake_sd;
2143-
/* Copy the generation from the cluster node.
2142+
/* Copy the latest connection pool generation.
21442143
* TODO (CDRIVER-4078) do not store the generation counter on the server
21452144
* description */
2146-
cluster_node->handshake_sd->generation = generation;
2145+
bson_mutex_lock (&cluster->client->topology->mutex);
2146+
cluster_node->handshake_sd->generation =
2147+
_mongoc_topology_get_connection_pool_generation (
2148+
cluster->client->topology, server_id, &handshake_sd->service_id);
2149+
bson_mutex_unlock (&cluster->client->topology->mutex);
21472150

21482151
bson_destroy (&speculative_auth_response);
21492152
mongoc_set_add (cluster->nodes, server_id, cluster_node);
@@ -2268,27 +2271,39 @@ _mongoc_cluster_stream_for_server (mongoc_cluster_t *cluster,
22682271
* into account.
22692272
*/
22702273

2274+
/* Add a transient transaction label if applicable. */
2275+
_mongoc_bson_init_with_transient_txn_error (cs, reply);
2276+
2277+
/* When establishing a new connection in load balanced mode, drivers MUST
2278+
* NOT perform SDAM error handling for any errors that occur before the
2279+
* MongoDB Handshake. */
2280+
bson_mutex_lock (&topology->mutex);
2281+
if (topology->description.type == MONGOC_TOPOLOGY_LOAD_BALANCED) {
2282+
bson_mutex_unlock (&topology->mutex);
2283+
return NULL;
2284+
}
2285+
bson_mutex_unlock (&topology->mutex);
2286+
22712287
mongoc_topology_invalidate_server (topology, server_id, err_ptr);
22722288
mongoc_cluster_disconnect_node (cluster, server_id);
22732289
bson_mutex_lock (&topology->mutex);
2274-
_mongoc_topology_clear_connection_pool (topology, server_id);
2290+
/* This is not load balanced mode, so there are no service IDs associated
2291+
* with connections. Pass kZeroServiceId to clear the entire connection
2292+
* pool to this server. */
2293+
_mongoc_topology_clear_connection_pool (topology, server_id, &kZeroServiceId);
22752294
if (!topology->single_threaded) {
22762295
_mongoc_topology_background_monitoring_cancel_check (topology,
22772296
server_id);
22782297
}
22792298
bson_mutex_unlock (&topology->mutex);
2280-
_mongoc_bson_init_with_transient_txn_error (cs, reply);
22812299
return NULL;
22822300
}
22832301

22842302
/* If this is a load balanced topology and the server stream does not have a
22852303
* service id, disconnect and return an error. */
22862304
bson_mutex_lock (&topology->mutex);
22872305
if (topology->description.type == MONGOC_TOPOLOGY_LOAD_BALANCED) {
2288-
bson_oid_t service_id;
2289-
2290-
if (!mongoc_server_description_service_id (server_stream->sd,
2291-
&service_id)) {
2306+
if (!mongoc_server_description_has_service_id (server_stream->sd)) {
22922307
bson_set_error (error,
22932308
MONGOC_ERROR_CLIENT,
22942309
MONGOC_ERROR_CLIENT_INVALID_LOAD_BALANCER,
@@ -2380,7 +2395,6 @@ mongoc_cluster_fetch_stream_single (mongoc_cluster_t *cluster,
23802395
bson_error_t *error /* OUT */)
23812396
{
23822397
mongoc_topology_t *topology;
2383-
mongoc_server_description_t *monitor_sd;
23842398
mongoc_server_description_t *handshake_sd;
23852399
mongoc_topology_scanner_node_t *scanner_node;
23862400
char *address;
@@ -2482,16 +2496,11 @@ mongoc_cluster_fetch_stream_single (mongoc_cluster_t *cluster,
24822496
scanner_node->has_auth = true;
24832497
}
24842498

2485-
/* Always copy the latest generation from the shared server description. */
2486-
monitor_sd = mongoc_topology_server_by_id (topology, server_id, error);
2487-
if (!monitor_sd) {
2488-
mongoc_server_description_destroy (handshake_sd);
2489-
return NULL;
2490-
}
2491-
/* TODO: (CDRIVER-4078) do not store the generation counter as part of the
2492-
* server description. */
2493-
handshake_sd->generation = monitor_sd->generation;
2494-
mongoc_server_description_destroy (monitor_sd);
2499+
/* Copy the latest connection pool generation.
2500+
* TODO (CDRIVER-4078) do not store the generation counter on the server
2501+
* description */
2502+
handshake_sd->generation = _mongoc_topology_get_connection_pool_generation (
2503+
cluster->client->topology, server_id, &handshake_sd->service_id);
24952504

24962505
return mongoc_server_stream_new (
24972506
&topology->description, handshake_sd, scanner_node->stream);
@@ -2540,7 +2549,10 @@ mongoc_cluster_stream_valid (mongoc_cluster_t *cluster,
25402549
bson_mutex_lock (&topology->mutex);
25412550
sd = mongoc_topology_description_server_by_id (
25422551
&topology->description, server_stream->sd->id, &error);
2543-
if (!sd || server_stream->sd->generation < sd->generation) {
2552+
if (!sd ||
2553+
server_stream->sd->generation <
2554+
_mongoc_topology_get_connection_pool_generation (
2555+
topology, server_stream->sd->id, &server_stream->sd->service_id)) {
25442556
/* No server description, or the pool has been cleared. */
25452557
bson_mutex_unlock (&topology->mutex);
25462558
goto done;
@@ -2585,7 +2597,6 @@ mongoc_cluster_fetch_stream_pooled (mongoc_cluster_t *cluster,
25852597
mongoc_cluster_node_t *cluster_node;
25862598
mongoc_server_description_t *sd;
25872599
bool has_server_description = false;
2588-
uint32_t generation = 0;
25892600

25902601
cluster_node =
25912602
(mongoc_cluster_node_t *) mongoc_set_get (cluster->nodes, server_id);
@@ -2596,14 +2607,21 @@ mongoc_cluster_fetch_stream_pooled (mongoc_cluster_t *cluster,
25962607
&topology->description, server_id, error);
25972608
if (sd) {
25982609
has_server_description = true;
2599-
generation = sd->generation;
26002610
}
26012611
bson_mutex_unlock (&topology->mutex);
26022612

26032613
if (cluster_node) {
2614+
uint32_t connection_pool_generation = 0;
26042615
BSON_ASSERT (cluster_node->stream);
26052616

2606-
if (!has_server_description || cluster_node->generation < generation) {
2617+
bson_mutex_lock (&topology->mutex);
2618+
connection_pool_generation =
2619+
_mongoc_topology_get_connection_pool_generation (
2620+
topology, server_id, &cluster_node->handshake_sd->service_id);
2621+
bson_mutex_unlock (&topology->mutex);
2622+
2623+
if (!has_server_description ||
2624+
cluster_node->handshake_sd->generation < connection_pool_generation) {
26072625
/* Since the stream was created, connections to this server were
26082626
* invalidated.
26092627
* This may have happened if:
@@ -2626,7 +2644,7 @@ mongoc_cluster_fetch_stream_pooled (mongoc_cluster_t *cluster,
26262644
}
26272645

26282646
cluster_node =
2629-
_mongoc_cluster_add_node (cluster, generation, server_id, error);
2647+
_mongoc_cluster_add_node (cluster, server_id, error);
26302648
if (cluster_node) {
26312649
return _mongoc_cluster_create_server_stream (
26322650
topology, cluster_node->handshake_sd, cluster_node->stream, error);
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright 2021-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "mongoc-prelude.h"
18+
19+
#include "bson/bson.h"
20+
21+
#ifndef MONGOC_GENERATION_MAP_PRIVATE_H
22+
#define MONGOC_GENERATION_MAP_PRIVATE_H
23+
24+
/* mongoc_generation_map_t maps a BSON ObjectID to an unsigned integer.
25+
* It is used to track connection generations. */
26+
typedef struct _mongoc_generation_map mongoc_generation_map_t;
27+
28+
mongoc_generation_map_t *
29+
mongoc_generation_map_new ();
30+
31+
mongoc_generation_map_t *
32+
mongoc_generation_map_copy (const mongoc_generation_map_t *gm);
33+
34+
uint32_t
35+
mongoc_generation_map_get (const mongoc_generation_map_t *gm,
36+
const bson_oid_t *key);
37+
38+
void
39+
mongoc_generation_map_increment (mongoc_generation_map_t *gm,
40+
const bson_oid_t *key);
41+
42+
void
43+
mongoc_generation_map_destroy (mongoc_generation_map_t *gm);
44+
45+
#endif /* MONGOC_GENERATION_MAP_PRIVATE_H */

0 commit comments

Comments
 (0)