From 8018a91c612bcb95ce6fbcb8cc2c2862adc09ecd Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 10 Jul 2025 13:10:35 +1000 Subject: [PATCH 1/5] [Test] Wait for stable cluster before assertion Nodes that are not publish quorum may not have applied the latest update when awaitMasterNode returns. This PR fixes it by waiting for the desired number of nodes. Relates: #129118 Resolves: #130883 --- muted-tests.yml | 3 --- .../main/java/org/elasticsearch/test/ESIntegTestCase.java | 7 ++++--- .../coordination/votingonly/VotingOnlyNodePluginTests.java | 4 +++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 32fcf5a1f3d26..45a2c0d3323e0 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -564,9 +564,6 @@ tests: - class: org.elasticsearch.cluster.ClusterStateSerializationTests method: testSerializationPreMultiProject issue: https://github.com/elastic/elasticsearch/issues/130872 -- class: org.elasticsearch.cluster.coordination.votingonly.VotingOnlyNodePluginTests - method: testPreferFullMasterOverVotingOnlyNodes - issue: https://github.com/elastic/elasticsearch/issues/130883 # Examples: # diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index f0bdbe5ced329..4d532c8fc30ea 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -965,12 +965,13 @@ public void awaitMasterNode(String viaNode, String masterNodeName) { } /** - * Waits for all nodes in the cluster to have a consistent view of which node is currently the master. + * Waits for all nodes forming the publish quorum in the cluster to have a consistent view of which node is currently the master. */ public void awaitMasterNode() { // The cluster health API always runs on the master node, and the master only completes cluster state publication when all nodes - // in the cluster have accepted the new cluster state. By waiting for all events to have finished on the master node, we ensure - // that the whole cluster has a consistent view of which node is the master. + // forming the publish quorum have accepted the new cluster state. By waiting for all events to have finished on the + // master node, we ensure that all nodes that are part of the publish quorum have a consistent view of which node is the master. + // But nodes that are _not_ part of the quorum might still not have the same view yet. clusterAdmin().prepareHealth(TEST_REQUEST_TIMEOUT).setTimeout(TEST_REQUEST_TIMEOUT).setWaitForEvents(Priority.LANGUID).get(); } diff --git a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java index 92297f7585128..513358c89d5a0 100644 --- a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java +++ b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java @@ -96,7 +96,8 @@ public void testPreferFullMasterOverVotingOnlyNodes() throws Exception { internalCluster().setBootstrapMasterNodeIndex(0); internalCluster().startNodes(2); internalCluster().startNode(addRoles(Set.of(DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE))); - internalCluster().startDataOnlyNodes(randomInt(2)); + final int numDataNodes = randomInt(2); + internalCluster().startDataOnlyNodes(numDataNodes); assertBusy( () -> assertThat( clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().getLastCommittedConfiguration().getNodeIds().size(), @@ -108,6 +109,7 @@ public void testPreferFullMasterOverVotingOnlyNodes() throws Exception { internalCluster().stopCurrentMasterNode(); awaitMasterNode(); assertNotEquals(originalMaster, internalCluster().getMasterName()); + ensureStableCluster(2 + numDataNodes); // wait for all nodes to join assertThat( VotingOnlyNodePlugin.isVotingOnlyNode( clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNode() From 5445ef6500875f174b55b5daa9795d4e412bb792 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 10 Jul 2025 18:19:39 +1000 Subject: [PATCH 2/5] revert changes to comment --- .../main/java/org/elasticsearch/test/ESIntegTestCase.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 4d532c8fc30ea..f0bdbe5ced329 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -965,13 +965,12 @@ public void awaitMasterNode(String viaNode, String masterNodeName) { } /** - * Waits for all nodes forming the publish quorum in the cluster to have a consistent view of which node is currently the master. + * Waits for all nodes in the cluster to have a consistent view of which node is currently the master. */ public void awaitMasterNode() { // The cluster health API always runs on the master node, and the master only completes cluster state publication when all nodes - // forming the publish quorum have accepted the new cluster state. By waiting for all events to have finished on the - // master node, we ensure that all nodes that are part of the publish quorum have a consistent view of which node is the master. - // But nodes that are _not_ part of the quorum might still not have the same view yet. + // in the cluster have accepted the new cluster state. By waiting for all events to have finished on the master node, we ensure + // that the whole cluster has a consistent view of which node is the master. clusterAdmin().prepareHealth(TEST_REQUEST_TIMEOUT).setTimeout(TEST_REQUEST_TIMEOUT).setWaitForEvents(Priority.LANGUID).get(); } From ce1452cbb1956a9926883b6110c6867f279aa571 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 10 Jul 2025 18:31:00 +1000 Subject: [PATCH 3/5] feedback --- .../votingonly/VotingOnlyNodePluginTests.java | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java index 513358c89d5a0..ed3ac9eb99b42 100644 --- a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java +++ b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; @@ -98,24 +99,19 @@ public void testPreferFullMasterOverVotingOnlyNodes() throws Exception { internalCluster().startNode(addRoles(Set.of(DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE))); final int numDataNodes = randomInt(2); internalCluster().startDataOnlyNodes(numDataNodes); - assertBusy( - () -> assertThat( - clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().getLastCommittedConfiguration().getNodeIds().size(), - equalTo(3) - ) + internalCluster().validateClusterFormed(); + + awaitClusterState( + state -> state.getLastCommittedConfiguration().getNodeIds().size() == 3 && state.nodes().size() == 3 + numDataNodes ); final String originalMaster = internalCluster().getMasterName(); internalCluster().stopCurrentMasterNode(); - awaitMasterNode(); + internalCluster().validateClusterFormed(); assertNotEquals(originalMaster, internalCluster().getMasterName()); - ensureStableCluster(2 + numDataNodes); // wait for all nodes to join - assertThat( - VotingOnlyNodePlugin.isVotingOnlyNode( - clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNode() - ), - equalTo(false) - ); + final ClusterState state = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState(); + assertThat(state.nodes().size(), equalTo(2 + numDataNodes)); + assertThat(VotingOnlyNodePlugin.isVotingOnlyNode(state.nodes().getMasterNode()), equalTo(false)); } public void testBootstrapOnlyVotingOnlyNodes() throws Exception { @@ -159,21 +155,21 @@ public void testVotingOnlyNodesCannotBeMasterWithoutFullMasterNodes() throws Exc internalCluster().setBootstrapMasterNodeIndex(0); internalCluster().startNode(); internalCluster().startNodes(2, addRoles(Set.of(DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE))); - internalCluster().startDataOnlyNodes(randomInt(2)); - assertBusy( - () -> assertThat( - clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().getLastCommittedConfiguration().getNodeIds().size(), - equalTo(3) - ) + final int numDataNodes = randomInt(2); + internalCluster().startDataOnlyNodes(numDataNodes); + internalCluster().validateClusterFormed(); + + awaitClusterState( + state -> state.getLastCommittedConfiguration().getNodeIds().size() == 3 && state.nodes().size() == 3 + numDataNodes ); - awaitMasterNode(); - final String oldMasterId = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNodeId(); + final String oldMasterId = internalCluster().getMasterName(); internalCluster().stopCurrentMasterNode(); awaitMasterNotFound(); // start a fresh full master node, which will be brought into the cluster as master by the voting-only nodes final String newMaster = internalCluster().startNode(); + internalCluster().validateClusterFormed(); assertEquals(newMaster, internalCluster().getMasterName()); final String newMasterId = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNodeId(); assertNotEquals(oldMasterId, newMasterId); From c0391746e926b41f9455411838926eebf80d0ae0 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 10 Jul 2025 19:18:40 +1000 Subject: [PATCH 4/5] feedback --- .../votingonly/VotingOnlyNodePluginTests.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java index ed3ac9eb99b42..4b66b649c8c02 100644 --- a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java +++ b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java @@ -107,11 +107,14 @@ public void testPreferFullMasterOverVotingOnlyNodes() throws Exception { final String originalMaster = internalCluster().getMasterName(); internalCluster().stopCurrentMasterNode(); - internalCluster().validateClusterFormed(); + awaitMasterNode(); assertNotEquals(originalMaster, internalCluster().getMasterName()); - final ClusterState state = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState(); - assertThat(state.nodes().size(), equalTo(2 + numDataNodes)); - assertThat(VotingOnlyNodePlugin.isVotingOnlyNode(state.nodes().getMasterNode()), equalTo(false)); + assertThat( + VotingOnlyNodePlugin.isVotingOnlyNode( + clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNode() + ), + equalTo(false) + ); } public void testBootstrapOnlyVotingOnlyNodes() throws Exception { @@ -169,7 +172,6 @@ public void testVotingOnlyNodesCannotBeMasterWithoutFullMasterNodes() throws Exc // start a fresh full master node, which will be brought into the cluster as master by the voting-only nodes final String newMaster = internalCluster().startNode(); - internalCluster().validateClusterFormed(); assertEquals(newMaster, internalCluster().getMasterName()); final String newMasterId = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNodeId(); assertNotEquals(oldMasterId, newMasterId); From 5ded2bd70b3862385629781d2d2798eea21e8553 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 10 Jul 2025 09:38:31 +0000 Subject: [PATCH 5/5] [CI] Auto commit changes from spotless --- .../coordination/votingonly/VotingOnlyNodePluginTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java index 4b66b649c8c02..1175b6b7ea299 100644 --- a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java +++ b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.elasticsearch.client.internal.Client; -import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.node.DiscoveryNode;