diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/SpecificMasterNodesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/SpecificMasterNodesIT.java index 222c7fc3e1f14..5f86111d352a9 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/SpecificMasterNodesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/SpecificMasterNodesIT.java @@ -13,190 +13,60 @@ import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; import org.elasticsearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.TimeValue; -import org.elasticsearch.discovery.MasterNotDiscoveredException; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; -import java.io.IOException; - import static org.elasticsearch.test.NodeRoles.dataOnlyNode; import static org.elasticsearch.test.NodeRoles.masterNode; import static org.elasticsearch.test.NodeRoles.nonDataNode; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.nullValue; @ClusterScope(scope = Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) public class SpecificMasterNodesIT extends ESIntegTestCase { - public void testSimpleOnlyMasterNodeElection() throws IOException { + public void testSimpleOnlyMasterNodeElection() throws Exception { internalCluster().setBootstrapMasterNodeIndex(0); logger.info("--> start data node / non master node"); internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s")); - try { - assertThat( - clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT) - .setMasterNodeTimeout(TimeValue.timeValueMillis(100)) - .get() - .getState() - .nodes() - .getMasterNodeId(), - nullValue() - ); - fail("should not be able to find master"); - } catch (MasterNotDiscoveredException e) { - // all is well, no master elected - } + awaitMasterNotFound(); + logger.info("--> start master node"); final String masterNodeName = internalCluster().startMasterOnlyNode(); - assertThat( - internalCluster().nonMasterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(masterNodeName) - ); - assertThat( - internalCluster().masterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(masterNodeName) - ); + + awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName); + awaitMasterNode(internalCluster().getMasterName(), masterNodeName); logger.info("--> stop master node"); Settings masterDataPathSettings = internalCluster().dataPathSettings(internalCluster().getMasterName()); internalCluster().stopCurrentMasterNode(); - try { - assertThat( - clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT) - .setMasterNodeTimeout(TimeValue.timeValueMillis(100)) - .get() - .getState() - .nodes() - .getMasterNodeId(), - nullValue() - ); - fail("should not be able to find master"); - } catch (MasterNotDiscoveredException e) { - // all is well, no master elected - } + awaitMasterNotFound(); logger.info("--> start previous master node again"); final String nextMasterEligibleNodeName = internalCluster().startNode( Settings.builder().put(nonDataNode(masterNode())).put(masterDataPathSettings) ); - assertThat( - internalCluster().nonMasterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(nextMasterEligibleNodeName) - ); - assertThat( - internalCluster().masterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(nextMasterEligibleNodeName) - ); + awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligibleNodeName); + awaitMasterNode(internalCluster().getMasterName(), nextMasterEligibleNodeName); } public void testElectOnlyBetweenMasterNodes() throws Exception { internalCluster().setBootstrapMasterNodeIndex(0); logger.info("--> start data node / non master node"); internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s")); - try { - assertThat( - clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT) - .setMasterNodeTimeout(TimeValue.timeValueMillis(100)) - .get() - .getState() - .nodes() - .getMasterNodeId(), - nullValue() - ); - fail("should not be able to find master"); - } catch (MasterNotDiscoveredException e) { - // all is well, no master elected - } + awaitMasterNotFound(); + logger.info("--> start master node (1)"); final String masterNodeName = internalCluster().startMasterOnlyNode(); - assertThat( - internalCluster().nonMasterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(masterNodeName) - ); - assertThat( - internalCluster().masterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(masterNodeName) - ); + awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName); + awaitMasterNode(internalCluster().getMasterName(), masterNodeName); logger.info("--> start master node (2)"); final String nextMasterEligableNodeName = internalCluster().startMasterOnlyNode(); - assertThat( - internalCluster().nonMasterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(masterNodeName) - ); - assertThat( - internalCluster().masterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(masterNodeName) - ); + awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName); + awaitMasterNode(internalCluster().getMasterName(), masterNodeName); logger.info("--> closing master node (1)"); client().execute( @@ -204,57 +74,12 @@ public void testElectOnlyBetweenMasterNodes() throws Exception { new AddVotingConfigExclusionsRequest(TEST_REQUEST_TIMEOUT, masterNodeName) ).get(); // removing the master from the voting configuration immediately triggers the master to step down - assertBusy(() -> { - assertThat( - internalCluster().nonMasterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(nextMasterEligableNodeName) - ); - assertThat( - internalCluster().masterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(nextMasterEligableNodeName) - ); - }); + awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligableNodeName); + awaitMasterNode(internalCluster().getMasterName(), nextMasterEligableNodeName); + internalCluster().stopNode(masterNodeName); - assertThat( - internalCluster().nonMasterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(nextMasterEligableNodeName) - ); - assertThat( - internalCluster().masterClient() - .admin() - .cluster() - .prepareState(TEST_REQUEST_TIMEOUT) - .get() - .getState() - .nodes() - .getMasterNode() - .getName(), - equalTo(nextMasterEligableNodeName) - ); + awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligableNodeName); + awaitMasterNode(internalCluster().getMasterName(), nextMasterEligableNodeName); } public void testAliasFilterValidation() { diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index d3f906eda9f32..c4bc5adfce72a 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -198,6 +198,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Random; import java.util.Set; import java.util.concurrent.Callable; @@ -940,6 +941,39 @@ public void waitNoPendingTasksOnAll() throws Exception { assertNoTimeout(clusterAdmin().prepareHealth(TEST_REQUEST_TIMEOUT).setWaitForEvents(Priority.LANGUID).get()); } + /** + * Waits for the node {@code viaNode} to see {@code masterNodeName} as the master node in the cluster state. + * Note that this does not guarantee that all other nodes in the cluster are on the same cluster state version already. + * + * @param viaNode the node to check the cluster state one + * @param masterNodeName the master node name that we wait for + */ + public void awaitMasterNode(String viaNode, String masterNodeName) { + var listener = ClusterServiceUtils.addTemporaryStateListener( + internalCluster().clusterService(viaNode), + state -> Optional.ofNullable(state.nodes().getMasterNode()).map(m -> m.getName().equals(masterNodeName)).orElse(false), + TEST_REQUEST_TIMEOUT + ); + safeAwait(listener, TEST_REQUEST_TIMEOUT); + } + + /** + * Waits for a random node in the cluster to not see a master node in the cluster state. + * Note that this does not guarantee that all other nodes in the cluster are on the same cluster state version already. + */ + public void awaitMasterNotFound() { + var viaNode = internalCluster().getRandomNodeName(); + // We use a temporary state listener instead of `awaitClusterState` here because the `ClusterStateObserver` doesn't run the + // predicate if the cluster state version didn't change. When a master node leaves the cluster (i.e. what this method is used for), + // the cluster state version is not incremented. + var listener = ClusterServiceUtils.addTemporaryStateListener( + internalCluster().clusterService(viaNode), + state -> state.nodes().getMasterNode() == null, + TEST_REQUEST_TIMEOUT + ); + safeAwait(listener, TEST_REQUEST_TIMEOUT); + } + /** Ensures the result counts are as expected, and logs the results if different */ public void assertResultsAndLogOnFailure(long expectedResults, SearchResponse searchResponse) { final TotalHits totalHits = searchResponse.getHits().getTotalHits(); diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index b4f278a7fbc7c..e00ae7abaf0eb 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -2033,15 +2033,28 @@ public String getMasterName() { * in the viaNode parameter. If viaNode isn't specified a random node will be picked to the send the request to. */ public String getMasterName(@Nullable String viaNode) { + viaNode = viaNode != null ? viaNode : getRandomNodeName(); + if (viaNode == null) { + throw new AssertionError("Unable to get master name, no node found"); + } try { - Client client = viaNode != null ? client(viaNode) : client(); - return client.admin().cluster().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNode().getName(); + ClusterServiceUtils.awaitClusterState(logger, state -> state.nodes().getMasterNode() != null, clusterService(viaNode)); + final ClusterState state = client(viaNode).admin().cluster().prepareState(TEST_REQUEST_TIMEOUT).setLocal(true).get().getState(); + return state.nodes().getMasterNode().getName(); } catch (Exception e) { logger.warn("Can't fetch cluster state", e); throw new RuntimeException("Can't get master node " + e.getMessage(), e); } } + public String getNonMasterNodeName() { + NodeAndClient randomNodeAndClient = getRandomNodeAndClient(new NodeNamePredicate(getMasterName()).negate()); + if (randomNodeAndClient != null) { + return randomNodeAndClient.getName(); + } + throw new AssertionError("No non-master node found"); + } + /** * @return the name of a random node in a cluster */