Skip to content

Commit c2c8c3e

Browse files
hpatrozuiderkwast
authored andcommitted
Send duplicate multi meet packet only for node which supports it (valkey-io#2840)
This prevents crashes on the older nodes in mixed clusters where some nodes are running 8.0 or older. Mixed clusters often exist temporarily during rolling upgrades. Fixes: valkey-io#2341 Signed-off-by: Harkrishn Patro <[email protected]>
1 parent 3b88eb0 commit c2c8c3e

File tree

2 files changed

+29
-15
lines changed

2 files changed

+29
-15
lines changed

src/cluster_legacy.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,9 @@ void clusterUpdateMyselfFlags(void) {
10201020
int nofailover = server.cluster_replica_no_failover ? CLUSTER_NODE_NOFAILOVER : 0;
10211021
myself->flags &= ~CLUSTER_NODE_NOFAILOVER;
10221022
myself->flags |= nofailover;
1023-
myself->flags |= CLUSTER_NODE_LIGHT_HDR_PUBLISH_SUPPORTED | CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED;
1023+
myself->flags |= CLUSTER_NODE_LIGHT_HDR_PUBLISH_SUPPORTED |
1024+
CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED |
1025+
CLUSTER_NODE_MULTI_MEET_SUPPORTED;
10241026
if (myself->flags != oldflags) {
10251027
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE);
10261028
}
@@ -3282,6 +3284,13 @@ int clusterProcessPacket(clusterLink *link) {
32823284
} else {
32833285
sender->flags &= ~CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED;
32843286
}
3287+
3288+
/* Check if the node can handle multi meet packet. */
3289+
if (flags & CLUSTER_NODE_MULTI_MEET_SUPPORTED) {
3290+
sender->flags |= CLUSTER_NODE_MULTI_MEET_SUPPORTED;
3291+
} else {
3292+
sender->flags &= ~CLUSTER_NODE_MULTI_MEET_SUPPORTED;
3293+
}
32853294
}
32863295

32873296
/* Update the last time we saw any data from this node. We
@@ -5279,7 +5288,8 @@ static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t now) {
52795288
}
52805289
if (nodeInNormalState(node) && node->link != NULL && node->inbound_link == NULL &&
52815290
now - node->inbound_link_freed_time > getHandshakeTimeout() &&
5282-
now - node->meet_sent > getHandshakeTimeout()) {
5291+
now - node->meet_sent > getHandshakeTimeout() &&
5292+
nodeSupportsMultiMeet(node)) {
52835293
/* Node has an outbound link, but no inbound link for more than the handshake timeout.
52845294
* This probably means this node does not know us yet, whereas we know it.
52855295
* So we send it a MEET packet to do a handshake with it and correct the inconsistent cluster view.

src/cluster_legacy.h

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,22 @@ typedef struct clusterLink {
4141
} clusterLink;
4242

4343
/* Cluster node flags and macros. */
44-
#define CLUSTER_NODE_PRIMARY (1 << 0) /* The node is a primary */
45-
#define CLUSTER_NODE_REPLICA (1 << 1) /* The node is a replica */
46-
#define CLUSTER_NODE_PFAIL (1 << 2) /* Failure? Need acknowledge */
47-
#define CLUSTER_NODE_FAIL (1 << 3) /* The node is believed to be malfunctioning */
48-
#define CLUSTER_NODE_MYSELF (1 << 4) /* This node is myself */
49-
#define CLUSTER_NODE_HANDSHAKE (1 << 5) /* We have still to exchange the first ping */
50-
#define CLUSTER_NODE_NOADDR (1 << 6) /* We don't know the address of this node */
51-
#define CLUSTER_NODE_MEET (1 << 7) /* Send a MEET message to this node */
52-
#define CLUSTER_NODE_MIGRATE_TO (1 << 8) /* Primary eligible for replica migration. */
53-
#define CLUSTER_NODE_NOFAILOVER (1 << 9) /* Replica will not try to failover. */
54-
#define CLUSTER_NODE_EXTENSIONS_SUPPORTED (1 << 10) /* This node supports extensions. */
55-
#define CLUSTER_NODE_LIGHT_HDR_PUBLISH_SUPPORTED (1 << 11) /* This node supports light message header for publish type. */
56-
#define CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED (1 << 12) /* This node supports light message header for module type. */
44+
#define CLUSTER_NODE_PRIMARY (1 << 0) /* The node is a primary */
45+
#define CLUSTER_NODE_REPLICA (1 << 1) /* The node is a replica */
46+
#define CLUSTER_NODE_PFAIL (1 << 2) /* Failure? Need acknowledge */
47+
#define CLUSTER_NODE_FAIL (1 << 3) /* The node is believed to be malfunctioning */
48+
#define CLUSTER_NODE_MYSELF (1 << 4) /* This node is myself */
49+
#define CLUSTER_NODE_HANDSHAKE (1 << 5) /* We have still to exchange the first ping */
50+
#define CLUSTER_NODE_NOADDR (1 << 6) /* We don't know the address of this node */
51+
#define CLUSTER_NODE_MEET (1 << 7) /* Send a MEET message to this node */
52+
#define CLUSTER_NODE_MIGRATE_TO (1 << 8) /* Primary eligible for replica migration. */
53+
#define CLUSTER_NODE_NOFAILOVER (1 << 9) /* Replica will not try to failover. */
54+
#define CLUSTER_NODE_EXTENSIONS_SUPPORTED (1 << 10) /* This node supports extensions. */
55+
#define CLUSTER_NODE_LIGHT_HDR_PUBLISH_SUPPORTED (1 << 11) /* This node supports light message header for publish type. */
56+
#define CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED (1 << 12) /* This node supports light message header for module type. */
57+
#define CLUSTER_NODE_MULTI_MEET_SUPPORTED CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED /* This node handles multi meet packet. \
58+
Light hdr for module and multi meet were both introduced in 8.1, \
59+
so we could reduce the same flag value. */
5760
#define CLUSTER_NODE_NULL_NAME \
5861
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
5962
"\000\000\000\000\000\000\000\000\000\000\000\000"
@@ -67,6 +70,7 @@ typedef struct clusterLink {
6770
#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
6871
#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER)
6972
#define nodeSupportsExtensions(n) ((n)->flags & CLUSTER_NODE_EXTENSIONS_SUPPORTED)
73+
#define nodeSupportsMultiMeet(n) ((n)->flags & CLUSTER_NODE_MULTI_MEET_SUPPORTED)
7074
#define nodeInNormalState(n) (!((n)->flags & (CLUSTER_NODE_HANDSHAKE | CLUSTER_NODE_MEET | CLUSTER_NODE_PFAIL | CLUSTER_NODE_FAIL)))
7175

7276
/* This structure represent elements of node->fail_reports. */

0 commit comments

Comments
 (0)