@@ -1474,6 +1474,120 @@ public void assertMatched() {
14741474 }
14751475 }
14761476
1477+ // TODO:
1478+ // 1: Refactor this test and above
1479+ // 2: Add a test where it fails to begin with, then the handshake block is removed and it succeeds
1480+ @ TestLogging (
1481+ reason = "test includes assertions about logging" ,
1482+ value = "org.elasticsearch.cluster.coordination.Coordinator:WARN,org.elasticsearch.cluster.coordination.JoinHelper:INFO"
1483+ )
1484+ public void testDoesNotReportConnectBackProblemsDuringJoiningIfNodeIsInClusterState () throws Exception {
1485+ try (var cluster = new Cluster (3 )) {
1486+ cluster .runRandomly ();
1487+ cluster .stabilise ();
1488+
1489+ final var partitionedNode = cluster .getAnyNode ();
1490+ partitionedNode .disconnect ();
1491+ cluster .stabilise ();
1492+
1493+ logger .info ("--> removed [{}] but adding to master's cluster state" , partitionedNode );
1494+ final ClusterNode leader = cluster .getAnyLeader ();
1495+ leader .submitUpdateTask ("updating cluster state" ,
1496+ cs -> {
1497+ ClusterState cs2 = ClusterState .builder (cs )
1498+ .nodes (DiscoveryNodes .builder (cs .nodes ())
1499+ .add (partitionedNode .getLocalNode ())
1500+ .build ())
1501+ .build ();
1502+ // Insert breakpoint here
1503+ return cs2 ;
1504+ },
1505+ (e ) -> {}
1506+ );
1507+
1508+ logger .info ("--> healing [{}] but blocking handshakes" , partitionedNode );
1509+ partitionedNode .heal ();
1510+ leader .addActionBlock (TransportService .HANDSHAKE_ACTION_NAME );
1511+
1512+ try (var mockLog = MockLog .capture (Coordinator .class , JoinHelper .class )) {
1513+
1514+ // Since the node is in the cluster state, we do not expect this log
1515+ mockLog .addExpectation (
1516+ new MockLog .UnseenEventExpectation (
1517+ "connect-back failure" ,
1518+ Coordinator .class .getCanonicalName (),
1519+ Level .WARN ,
1520+ "*received join request from ["
1521+ + partitionedNode .getLocalNode ().descriptionWithoutAttributes ()
1522+ + "] but could not connect back to the joining node"
1523+ )
1524+ );
1525+
1526+ // We do not expect an info log from JoinHelper about the handshake failure
1527+ mockLog .addExpectation (new MockLog .LoggingExpectation () {
1528+ boolean matched = false ;
1529+
1530+ @ Override
1531+ public void match (LogEvent event ) {
1532+ if (event .getLevel () != Level .INFO ) {
1533+ return ;
1534+ }
1535+ if (event .getLoggerName ().equals (JoinHelper .class .getCanonicalName ()) == false ) {
1536+ return ;
1537+ }
1538+
1539+ var cause = event .getThrown ();
1540+ if (cause == null ) {
1541+ return ;
1542+ }
1543+ cause = cause .getCause ();
1544+ if (cause == null ) {
1545+ return ;
1546+ }
1547+ if (Regex .simpleMatch (
1548+ "* failure when opening connection back from ["
1549+ + leader .getLocalNode ().descriptionWithoutAttributes ()
1550+ + "] to ["
1551+ + partitionedNode .getLocalNode ().descriptionWithoutAttributes ()
1552+ + "]" ,
1553+ cause .getMessage ()
1554+ ) == false ) {
1555+ return ;
1556+ }
1557+ if (cause .getStackTrace () != null && cause .getStackTrace ().length != 0 ) {
1558+ return ;
1559+ }
1560+ matched = true ;
1561+ }
1562+
1563+ @ Override
1564+ public void assertMatched () {
1565+ assertFalse (matched );
1566+ }
1567+ });
1568+
1569+ cluster .runFor (
1570+ // This expects 8 tasks to be executed after PeerFinder handling wakeup:
1571+ //
1572+ // * connectToRemoteMasterNode[0.0.0.0:11]
1573+ // * [internal:transport/handshake] from {node1} to {node2}
1574+ // * response to [internal:transport/handshake] from {node1} to {node2}
1575+ // * [internal:discovery/request_peers] from {node1} to
1576+ // * response to [internal:discovery/request_peers] from {node1} to {node2}
1577+ // * [internal:cluster/coordination/join] from {node1} to {node2}
1578+ // * [internal:transport/handshake] from {node2} to {node1} (rejected due to action block)
1579+ // * error response to [internal:cluster/coordination/join] from {node1} to {node2}
1580+ //
1581+ defaultMillis (DISCOVERY_FIND_PEERS_INTERVAL_SETTING ) + 8 * DEFAULT_DELAY_VARIABILITY ,
1582+ "allowing time for join attempt"
1583+ );
1584+ mockLog .assertAllExpectationsMatched ();
1585+ }
1586+
1587+ leader .clearActionBlocks ();
1588+ }
1589+ }
1590+
14771591 public void testFollowerRemovedIfUnableToSendRequestsToMaster () {
14781592 try (Cluster cluster = new Cluster (3 )) {
14791593 cluster .runRandomly ();
0 commit comments