@@ -1474,6 +1474,120 @@ public void assertMatched() {
1474
1474
}
1475
1475
}
1476
1476
1477
+ // TODO:
1478
+ // 1: Refactor this test and above
1479
+ // 2: Add a test where it fails to begin with, then the handshake block is removed and it succeeds
1480
+ @ TestLogging (
1481
+ reason = "test includes assertions about logging" ,
1482
+ value = "org.elasticsearch.cluster.coordination.Coordinator:WARN,org.elasticsearch.cluster.coordination.JoinHelper:INFO"
1483
+ )
1484
+ public void testDoesNotReportConnectBackProblemsDuringJoiningIfNodeIsInClusterState () throws Exception {
1485
+ try (var cluster = new Cluster (3 )) {
1486
+ cluster .runRandomly ();
1487
+ cluster .stabilise ();
1488
+
1489
+ final var partitionedNode = cluster .getAnyNode ();
1490
+ partitionedNode .disconnect ();
1491
+ cluster .stabilise ();
1492
+
1493
+ logger .info ("--> removed [{}] but adding to master's cluster state" , partitionedNode );
1494
+ final ClusterNode leader = cluster .getAnyLeader ();
1495
+ leader .submitUpdateTask ("updating cluster state" ,
1496
+ cs -> {
1497
+ ClusterState cs2 = ClusterState .builder (cs )
1498
+ .nodes (DiscoveryNodes .builder (cs .nodes ())
1499
+ .add (partitionedNode .getLocalNode ())
1500
+ .build ())
1501
+ .build ();
1502
+ // Insert breakpoint here
1503
+ return cs2 ;
1504
+ },
1505
+ (e ) -> {}
1506
+ );
1507
+
1508
+ logger .info ("--> healing [{}] but blocking handshakes" , partitionedNode );
1509
+ partitionedNode .heal ();
1510
+ leader .addActionBlock (TransportService .HANDSHAKE_ACTION_NAME );
1511
+
1512
+ try (var mockLog = MockLog .capture (Coordinator .class , JoinHelper .class )) {
1513
+
1514
+ // Since the node is in the cluster state, we do not expect this log
1515
+ mockLog .addExpectation (
1516
+ new MockLog .UnseenEventExpectation (
1517
+ "connect-back failure" ,
1518
+ Coordinator .class .getCanonicalName (),
1519
+ Level .WARN ,
1520
+ "*received join request from ["
1521
+ + partitionedNode .getLocalNode ().descriptionWithoutAttributes ()
1522
+ + "] but could not connect back to the joining node"
1523
+ )
1524
+ );
1525
+
1526
+ // We do not expect an info log from JoinHelper about the handshake failure
1527
+ mockLog .addExpectation (new MockLog .LoggingExpectation () {
1528
+ boolean matched = false ;
1529
+
1530
+ @ Override
1531
+ public void match (LogEvent event ) {
1532
+ if (event .getLevel () != Level .INFO ) {
1533
+ return ;
1534
+ }
1535
+ if (event .getLoggerName ().equals (JoinHelper .class .getCanonicalName ()) == false ) {
1536
+ return ;
1537
+ }
1538
+
1539
+ var cause = event .getThrown ();
1540
+ if (cause == null ) {
1541
+ return ;
1542
+ }
1543
+ cause = cause .getCause ();
1544
+ if (cause == null ) {
1545
+ return ;
1546
+ }
1547
+ if (Regex .simpleMatch (
1548
+ "* failure when opening connection back from ["
1549
+ + leader .getLocalNode ().descriptionWithoutAttributes ()
1550
+ + "] to ["
1551
+ + partitionedNode .getLocalNode ().descriptionWithoutAttributes ()
1552
+ + "]" ,
1553
+ cause .getMessage ()
1554
+ ) == false ) {
1555
+ return ;
1556
+ }
1557
+ if (cause .getStackTrace () != null && cause .getStackTrace ().length != 0 ) {
1558
+ return ;
1559
+ }
1560
+ matched = true ;
1561
+ }
1562
+
1563
+ @ Override
1564
+ public void assertMatched () {
1565
+ assertFalse (matched );
1566
+ }
1567
+ });
1568
+
1569
+ cluster .runFor (
1570
+ // This expects 8 tasks to be executed after PeerFinder handling wakeup:
1571
+ //
1572
+ // * connectToRemoteMasterNode[0.0.0.0:11]
1573
+ // * [internal:transport/handshake] from {node1} to {node2}
1574
+ // * response to [internal:transport/handshake] from {node1} to {node2}
1575
+ // * [internal:discovery/request_peers] from {node1} to
1576
+ // * response to [internal:discovery/request_peers] from {node1} to {node2}
1577
+ // * [internal:cluster/coordination/join] from {node1} to {node2}
1578
+ // * [internal:transport/handshake] from {node2} to {node1} (rejected due to action block)
1579
+ // * error response to [internal:cluster/coordination/join] from {node1} to {node2}
1580
+ //
1581
+ defaultMillis (DISCOVERY_FIND_PEERS_INTERVAL_SETTING ) + 8 * DEFAULT_DELAY_VARIABILITY ,
1582
+ "allowing time for join attempt"
1583
+ );
1584
+ mockLog .assertAllExpectationsMatched ();
1585
+ }
1586
+
1587
+ leader .clearActionBlocks ();
1588
+ }
1589
+ }
1590
+
1477
1591
public void testFollowerRemovedIfUnableToSendRequestsToMaster () {
1478
1592
try (Cluster cluster = new Cluster (3 )) {
1479
1593
cluster .runRandomly ();
0 commit comments