Skip to content

Commit 3535200

Browse files
hpatrozuiderkwast
authored andcommitted
Send duplicate multi meet packet only for node which supports it (#2840)
This prevents crashes on the older nodes in mixed clusters where some nodes are running 8.0 or older. Mixed clusters often exist temporarily during rolling upgrades. Fixes: #2341 Signed-off-by: Harkrishn Patro <[email protected]>
1 parent 032e3cb commit 3535200

File tree

2 files changed

+28
-15
lines changed

2 files changed

+28
-15
lines changed

src/cluster_legacy.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,8 @@ void clusterUpdateMyselfFlags(void) {
11451145
myself->flags |= nofailover;
11461146
myself->flags |= CLUSTER_NODE_EXTENSIONS_SUPPORTED |
11471147
CLUSTER_NODE_LIGHT_HDR_PUBLISH_SUPPORTED |
1148-
CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED;
1148+
CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED |
1149+
CLUSTER_NODE_MULTI_MEET_SUPPORTED;
11491150
if (myself->flags != oldflags) {
11501151
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE);
11511152
}
@@ -3675,6 +3676,13 @@ int clusterProcessPacket(clusterLink *link) {
36753676
} else {
36763677
sender->flags &= ~CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED;
36773678
}
3679+
3680+
/* Check if the node can handle multi meet packet. */
3681+
if (flags & CLUSTER_NODE_MULTI_MEET_SUPPORTED) {
3682+
sender->flags |= CLUSTER_NODE_MULTI_MEET_SUPPORTED;
3683+
} else {
3684+
sender->flags &= ~CLUSTER_NODE_MULTI_MEET_SUPPORTED;
3685+
}
36783686
}
36793687

36803688
/* Update the last time we saw any data from this node. We
@@ -5727,7 +5735,8 @@ static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t now, long
57275735
}
57285736
if (nodeInNormalState(node) && node->link != NULL && node->inbound_link == NULL &&
57295737
now - node->inbound_link_freed_time > getHandshakeTimeout() &&
5730-
now - node->meet_sent > getHandshakeTimeout()) {
5738+
now - node->meet_sent > getHandshakeTimeout() &&
5739+
nodeSupportsMultiMeet(node)) {
57315740
/* Node has an outbound link, but no inbound link for more than the handshake timeout.
57325741
* This probably means this node does not know us yet, whereas we know it.
57335742
* So we send it a MEET packet to do a handshake with it and correct the inconsistent cluster view.

src/cluster_legacy.h

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,22 @@ typedef struct clusterLink {
4949
#define linkSupportsExtension(link) ((link)->flags & CLUSTER_LINK_EXTENSIONS_SUPPORTED)
5050

5151
/* Cluster node flags and macros. */
52-
#define CLUSTER_NODE_PRIMARY (1 << 0) /* The node is a primary */
53-
#define CLUSTER_NODE_REPLICA (1 << 1) /* The node is a replica */
54-
#define CLUSTER_NODE_PFAIL (1 << 2) /* Failure? Need acknowledge */
55-
#define CLUSTER_NODE_FAIL (1 << 3) /* The node is believed to be malfunctioning */
56-
#define CLUSTER_NODE_MYSELF (1 << 4) /* This node is myself */
57-
#define CLUSTER_NODE_HANDSHAKE (1 << 5) /* We have still to exchange the first ping */
58-
#define CLUSTER_NODE_NOADDR (1 << 6) /* We don't know the address of this node */
59-
#define CLUSTER_NODE_MEET (1 << 7) /* Send a MEET message to this node */
60-
#define CLUSTER_NODE_MIGRATE_TO (1 << 8) /* Primary eligible for replica migration. */
61-
#define CLUSTER_NODE_NOFAILOVER (1 << 9) /* Replica will not try to failover. */
62-
#define CLUSTER_NODE_EXTENSIONS_SUPPORTED (1 << 10) /* This node supports extensions. */
63-
#define CLUSTER_NODE_LIGHT_HDR_PUBLISH_SUPPORTED (1 << 11) /* This node supports light message header for publish type. */
64-
#define CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED (1 << 12) /* This node supports light message header for module type. */
52+
#define CLUSTER_NODE_PRIMARY (1 << 0) /* The node is a primary */
53+
#define CLUSTER_NODE_REPLICA (1 << 1) /* The node is a replica */
54+
#define CLUSTER_NODE_PFAIL (1 << 2) /* Failure? Need acknowledge */
55+
#define CLUSTER_NODE_FAIL (1 << 3) /* The node is believed to be malfunctioning */
56+
#define CLUSTER_NODE_MYSELF (1 << 4) /* This node is myself */
57+
#define CLUSTER_NODE_HANDSHAKE (1 << 5) /* We have still to exchange the first ping */
58+
#define CLUSTER_NODE_NOADDR (1 << 6) /* We don't know the address of this node */
59+
#define CLUSTER_NODE_MEET (1 << 7) /* Send a MEET message to this node */
60+
#define CLUSTER_NODE_MIGRATE_TO (1 << 8) /* Primary eligible for replica migration. */
61+
#define CLUSTER_NODE_NOFAILOVER (1 << 9) /* Replica will not try to failover. */
62+
#define CLUSTER_NODE_EXTENSIONS_SUPPORTED (1 << 10) /* This node supports extensions. */
63+
#define CLUSTER_NODE_LIGHT_HDR_PUBLISH_SUPPORTED (1 << 11) /* This node supports light message header for publish type. */
64+
#define CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED (1 << 12) /* This node supports light message header for module type. */
65+
#define CLUSTER_NODE_MULTI_MEET_SUPPORTED CLUSTER_NODE_LIGHT_HDR_MODULE_SUPPORTED /* This node handles multi meet packet. \
66+
Light hdr for module and multi meet were both introduced in 8.1, \
67+
so we could reduce the same flag value. */
6568
#define CLUSTER_NODE_NULL_NAME \
6669
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
6770
"\000\000\000\000\000\000\000\000\000\000\000\000"
@@ -75,6 +78,7 @@ typedef struct clusterLink {
7578
#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
7679
#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER)
7780
#define nodeSupportsExtensions(n) ((n)->flags & CLUSTER_NODE_EXTENSIONS_SUPPORTED)
81+
#define nodeSupportsMultiMeet(n) ((n)->flags & CLUSTER_NODE_MULTI_MEET_SUPPORTED)
7882
#define nodeInNormalState(n) (!((n)->flags & (CLUSTER_NODE_HANDSHAKE | CLUSTER_NODE_MEET | CLUSTER_NODE_PFAIL | CLUSTER_NODE_FAIL)))
7983

8084
/* Cluster messages header */

0 commit comments

Comments
 (0)