Skip to content

Commit 7bd2b80

Browse files
authored
Remove dependency on cluster state API in SpecificMasterNodesIT (#127213)
These tests depended on the cluster state API to wait for the master node. This behavior is being removed, so we switch to alternative approaches of waiting for the master node. Relates #127212
1 parent 3ecb991 commit 7bd2b80

File tree

3 files changed

+69
-197
lines changed

3 files changed

+69
-197
lines changed

server/src/internalClusterTest/java/org/elasticsearch/cluster/SpecificMasterNodesIT.java

Lines changed: 20 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -13,248 +13,73 @@
1313
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
1414
import org.elasticsearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction;
1515
import org.elasticsearch.common.settings.Settings;
16-
import org.elasticsearch.core.TimeValue;
17-
import org.elasticsearch.discovery.MasterNotDiscoveredException;
1816
import org.elasticsearch.index.query.QueryBuilders;
1917
import org.elasticsearch.test.ESIntegTestCase;
2018
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
2119
import org.elasticsearch.test.ESIntegTestCase.Scope;
2220

23-
import java.io.IOException;
24-
2521
import static org.elasticsearch.test.NodeRoles.dataOnlyNode;
2622
import static org.elasticsearch.test.NodeRoles.masterNode;
2723
import static org.elasticsearch.test.NodeRoles.nonDataNode;
2824
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
29-
import static org.hamcrest.Matchers.equalTo;
30-
import static org.hamcrest.Matchers.nullValue;
3125

3226
@ClusterScope(scope = Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
3327
public class SpecificMasterNodesIT extends ESIntegTestCase {
3428

35-
public void testSimpleOnlyMasterNodeElection() throws IOException {
29+
public void testSimpleOnlyMasterNodeElection() throws Exception {
3630
internalCluster().setBootstrapMasterNodeIndex(0);
3731
logger.info("--> start data node / non master node");
3832
internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
39-
try {
40-
assertThat(
41-
clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
42-
.setMasterNodeTimeout(TimeValue.timeValueMillis(100))
43-
.get()
44-
.getState()
45-
.nodes()
46-
.getMasterNodeId(),
47-
nullValue()
48-
);
49-
fail("should not be able to find master");
50-
} catch (MasterNotDiscoveredException e) {
51-
// all is well, no master elected
52-
}
33+
awaitMasterNotFound();
34+
5335
logger.info("--> start master node");
5436
final String masterNodeName = internalCluster().startMasterOnlyNode();
55-
assertThat(
56-
internalCluster().nonMasterClient()
57-
.admin()
58-
.cluster()
59-
.prepareState(TEST_REQUEST_TIMEOUT)
60-
.get()
61-
.getState()
62-
.nodes()
63-
.getMasterNode()
64-
.getName(),
65-
equalTo(masterNodeName)
66-
);
67-
assertThat(
68-
internalCluster().masterClient()
69-
.admin()
70-
.cluster()
71-
.prepareState(TEST_REQUEST_TIMEOUT)
72-
.get()
73-
.getState()
74-
.nodes()
75-
.getMasterNode()
76-
.getName(),
77-
equalTo(masterNodeName)
78-
);
37+
38+
awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName);
39+
awaitMasterNode(internalCluster().getMasterName(), masterNodeName);
7940

8041
logger.info("--> stop master node");
8142
Settings masterDataPathSettings = internalCluster().dataPathSettings(internalCluster().getMasterName());
8243
internalCluster().stopCurrentMasterNode();
8344

84-
try {
85-
assertThat(
86-
clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
87-
.setMasterNodeTimeout(TimeValue.timeValueMillis(100))
88-
.get()
89-
.getState()
90-
.nodes()
91-
.getMasterNodeId(),
92-
nullValue()
93-
);
94-
fail("should not be able to find master");
95-
} catch (MasterNotDiscoveredException e) {
96-
// all is well, no master elected
97-
}
45+
awaitMasterNotFound();
9846

9947
logger.info("--> start previous master node again");
10048
final String nextMasterEligibleNodeName = internalCluster().startNode(
10149
Settings.builder().put(nonDataNode(masterNode())).put(masterDataPathSettings)
10250
);
103-
assertThat(
104-
internalCluster().nonMasterClient()
105-
.admin()
106-
.cluster()
107-
.prepareState(TEST_REQUEST_TIMEOUT)
108-
.get()
109-
.getState()
110-
.nodes()
111-
.getMasterNode()
112-
.getName(),
113-
equalTo(nextMasterEligibleNodeName)
114-
);
115-
assertThat(
116-
internalCluster().masterClient()
117-
.admin()
118-
.cluster()
119-
.prepareState(TEST_REQUEST_TIMEOUT)
120-
.get()
121-
.getState()
122-
.nodes()
123-
.getMasterNode()
124-
.getName(),
125-
equalTo(nextMasterEligibleNodeName)
126-
);
51+
awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligibleNodeName);
52+
awaitMasterNode(internalCluster().getMasterName(), nextMasterEligibleNodeName);
12753
}
12854

12955
public void testElectOnlyBetweenMasterNodes() throws Exception {
13056
internalCluster().setBootstrapMasterNodeIndex(0);
13157
logger.info("--> start data node / non master node");
13258
internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
133-
try {
134-
assertThat(
135-
clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
136-
.setMasterNodeTimeout(TimeValue.timeValueMillis(100))
137-
.get()
138-
.getState()
139-
.nodes()
140-
.getMasterNodeId(),
141-
nullValue()
142-
);
143-
fail("should not be able to find master");
144-
} catch (MasterNotDiscoveredException e) {
145-
// all is well, no master elected
146-
}
59+
awaitMasterNotFound();
60+
14761
logger.info("--> start master node (1)");
14862
final String masterNodeName = internalCluster().startMasterOnlyNode();
149-
assertThat(
150-
internalCluster().nonMasterClient()
151-
.admin()
152-
.cluster()
153-
.prepareState(TEST_REQUEST_TIMEOUT)
154-
.get()
155-
.getState()
156-
.nodes()
157-
.getMasterNode()
158-
.getName(),
159-
equalTo(masterNodeName)
160-
);
161-
assertThat(
162-
internalCluster().masterClient()
163-
.admin()
164-
.cluster()
165-
.prepareState(TEST_REQUEST_TIMEOUT)
166-
.get()
167-
.getState()
168-
.nodes()
169-
.getMasterNode()
170-
.getName(),
171-
equalTo(masterNodeName)
172-
);
63+
awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName);
64+
awaitMasterNode(internalCluster().getMasterName(), masterNodeName);
17365

17466
logger.info("--> start master node (2)");
17567
final String nextMasterEligableNodeName = internalCluster().startMasterOnlyNode();
176-
assertThat(
177-
internalCluster().nonMasterClient()
178-
.admin()
179-
.cluster()
180-
.prepareState(TEST_REQUEST_TIMEOUT)
181-
.get()
182-
.getState()
183-
.nodes()
184-
.getMasterNode()
185-
.getName(),
186-
equalTo(masterNodeName)
187-
);
188-
assertThat(
189-
internalCluster().masterClient()
190-
.admin()
191-
.cluster()
192-
.prepareState(TEST_REQUEST_TIMEOUT)
193-
.get()
194-
.getState()
195-
.nodes()
196-
.getMasterNode()
197-
.getName(),
198-
equalTo(masterNodeName)
199-
);
68+
awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName);
69+
awaitMasterNode(internalCluster().getMasterName(), masterNodeName);
20070

20171
logger.info("--> closing master node (1)");
20272
client().execute(
20373
TransportAddVotingConfigExclusionsAction.TYPE,
20474
new AddVotingConfigExclusionsRequest(TEST_REQUEST_TIMEOUT, masterNodeName)
20575
).get();
20676
// removing the master from the voting configuration immediately triggers the master to step down
207-
assertBusy(() -> {
208-
assertThat(
209-
internalCluster().nonMasterClient()
210-
.admin()
211-
.cluster()
212-
.prepareState(TEST_REQUEST_TIMEOUT)
213-
.get()
214-
.getState()
215-
.nodes()
216-
.getMasterNode()
217-
.getName(),
218-
equalTo(nextMasterEligableNodeName)
219-
);
220-
assertThat(
221-
internalCluster().masterClient()
222-
.admin()
223-
.cluster()
224-
.prepareState(TEST_REQUEST_TIMEOUT)
225-
.get()
226-
.getState()
227-
.nodes()
228-
.getMasterNode()
229-
.getName(),
230-
equalTo(nextMasterEligableNodeName)
231-
);
232-
});
77+
awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligableNodeName);
78+
awaitMasterNode(internalCluster().getMasterName(), nextMasterEligableNodeName);
79+
23380
internalCluster().stopNode(masterNodeName);
234-
assertThat(
235-
internalCluster().nonMasterClient()
236-
.admin()
237-
.cluster()
238-
.prepareState(TEST_REQUEST_TIMEOUT)
239-
.get()
240-
.getState()
241-
.nodes()
242-
.getMasterNode()
243-
.getName(),
244-
equalTo(nextMasterEligableNodeName)
245-
);
246-
assertThat(
247-
internalCluster().masterClient()
248-
.admin()
249-
.cluster()
250-
.prepareState(TEST_REQUEST_TIMEOUT)
251-
.get()
252-
.getState()
253-
.nodes()
254-
.getMasterNode()
255-
.getName(),
256-
equalTo(nextMasterEligableNodeName)
257-
);
81+
awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligableNodeName);
82+
awaitMasterNode(internalCluster().getMasterName(), nextMasterEligableNodeName);
25883
}
25984

26085
public void testAliasFilterValidation() {

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@
198198
import java.util.List;
199199
import java.util.Locale;
200200
import java.util.Map;
201+
import java.util.Optional;
201202
import java.util.Random;
202203
import java.util.Set;
203204
import java.util.concurrent.Callable;
@@ -940,6 +941,39 @@ public void waitNoPendingTasksOnAll() throws Exception {
940941
assertNoTimeout(clusterAdmin().prepareHealth(TEST_REQUEST_TIMEOUT).setWaitForEvents(Priority.LANGUID).get());
941942
}
942943

944+
/**
945+
* Waits for the node {@code viaNode} to see {@code masterNodeName} as the master node in the cluster state.
946+
* Note that this does not guarantee that all other nodes in the cluster are on the same cluster state version already.
947+
*
948+
* @param viaNode the node to check the cluster state one
949+
* @param masterNodeName the master node name that we wait for
950+
*/
951+
public void awaitMasterNode(String viaNode, String masterNodeName) {
952+
var listener = ClusterServiceUtils.addTemporaryStateListener(
953+
internalCluster().clusterService(viaNode),
954+
state -> Optional.ofNullable(state.nodes().getMasterNode()).map(m -> m.getName().equals(masterNodeName)).orElse(false),
955+
TEST_REQUEST_TIMEOUT
956+
);
957+
safeAwait(listener, TEST_REQUEST_TIMEOUT);
958+
}
959+
960+
/**
961+
* Waits for a random node in the cluster to not see a master node in the cluster state.
962+
* Note that this does not guarantee that all other nodes in the cluster are on the same cluster state version already.
963+
*/
964+
public void awaitMasterNotFound() {
965+
var viaNode = internalCluster().getRandomNodeName();
966+
// We use a temporary state listener instead of `awaitClusterState` here because the `ClusterStateObserver` doesn't run the
967+
// predicate if the cluster state version didn't change. When a master node leaves the cluster (i.e. what this method is used for),
968+
// the cluster state version is not incremented.
969+
var listener = ClusterServiceUtils.addTemporaryStateListener(
970+
internalCluster().clusterService(viaNode),
971+
state -> state.nodes().getMasterNode() == null,
972+
TEST_REQUEST_TIMEOUT
973+
);
974+
safeAwait(listener, TEST_REQUEST_TIMEOUT);
975+
}
976+
943977
/** Ensures the result counts are as expected, and logs the results if different */
944978
public void assertResultsAndLogOnFailure(long expectedResults, SearchResponse searchResponse) {
945979
final TotalHits totalHits = searchResponse.getHits().getTotalHits();

test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,15 +2033,28 @@ public String getMasterName() {
20332033
* in the viaNode parameter. If viaNode isn't specified a random node will be picked to the send the request to.
20342034
*/
20352035
public String getMasterName(@Nullable String viaNode) {
2036+
viaNode = viaNode != null ? viaNode : getRandomNodeName();
2037+
if (viaNode == null) {
2038+
throw new AssertionError("Unable to get master name, no node found");
2039+
}
20362040
try {
2037-
Client client = viaNode != null ? client(viaNode) : client();
2038-
return client.admin().cluster().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNode().getName();
2041+
ClusterServiceUtils.awaitClusterState(logger, state -> state.nodes().getMasterNode() != null, clusterService(viaNode));
2042+
final ClusterState state = client(viaNode).admin().cluster().prepareState(TEST_REQUEST_TIMEOUT).setLocal(true).get().getState();
2043+
return state.nodes().getMasterNode().getName();
20392044
} catch (Exception e) {
20402045
logger.warn("Can't fetch cluster state", e);
20412046
throw new RuntimeException("Can't get master node " + e.getMessage(), e);
20422047
}
20432048
}
20442049

2050+
public String getNonMasterNodeName() {
2051+
NodeAndClient randomNodeAndClient = getRandomNodeAndClient(new NodeNamePredicate(getMasterName()).negate());
2052+
if (randomNodeAndClient != null) {
2053+
return randomNodeAndClient.getName();
2054+
}
2055+
throw new AssertionError("No non-master node found");
2056+
}
2057+
20452058
/**
20462059
* @return the name of a random node in a cluster
20472060
*/

0 commit comments

Comments
 (0)