Skip to content

Commit 9c81a71

Browse files
committed
Merge remote-tracking branch 'origin/main' into non-issue/ES-7879-translog-guide
2 parents f4a3aa8 + 7bd2b80 commit 9c81a71

File tree

4 files changed

+75
-197
lines changed

4 files changed

+75
-197
lines changed

server/src/internalClusterTest/java/org/elasticsearch/cluster/SpecificMasterNodesIT.java

Lines changed: 20 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -13,248 +13,73 @@
1313
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
1414
import org.elasticsearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction;
1515
import org.elasticsearch.common.settings.Settings;
16-
import org.elasticsearch.core.TimeValue;
17-
import org.elasticsearch.discovery.MasterNotDiscoveredException;
1816
import org.elasticsearch.index.query.QueryBuilders;
1917
import org.elasticsearch.test.ESIntegTestCase;
2018
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
2119
import org.elasticsearch.test.ESIntegTestCase.Scope;
2220

23-
import java.io.IOException;
24-
2521
import static org.elasticsearch.test.NodeRoles.dataOnlyNode;
2622
import static org.elasticsearch.test.NodeRoles.masterNode;
2723
import static org.elasticsearch.test.NodeRoles.nonDataNode;
2824
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
29-
import static org.hamcrest.Matchers.equalTo;
30-
import static org.hamcrest.Matchers.nullValue;
3125

3226
@ClusterScope(scope = Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
3327
public class SpecificMasterNodesIT extends ESIntegTestCase {
3428

35-
public void testSimpleOnlyMasterNodeElection() throws IOException {
29+
public void testSimpleOnlyMasterNodeElection() throws Exception {
3630
internalCluster().setBootstrapMasterNodeIndex(0);
3731
logger.info("--> start data node / non master node");
3832
internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
39-
try {
40-
assertThat(
41-
clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
42-
.setMasterNodeTimeout(TimeValue.timeValueMillis(100))
43-
.get()
44-
.getState()
45-
.nodes()
46-
.getMasterNodeId(),
47-
nullValue()
48-
);
49-
fail("should not be able to find master");
50-
} catch (MasterNotDiscoveredException e) {
51-
// all is well, no master elected
52-
}
33+
awaitMasterNotFound();
34+
5335
logger.info("--> start master node");
5436
final String masterNodeName = internalCluster().startMasterOnlyNode();
55-
assertThat(
56-
internalCluster().nonMasterClient()
57-
.admin()
58-
.cluster()
59-
.prepareState(TEST_REQUEST_TIMEOUT)
60-
.get()
61-
.getState()
62-
.nodes()
63-
.getMasterNode()
64-
.getName(),
65-
equalTo(masterNodeName)
66-
);
67-
assertThat(
68-
internalCluster().masterClient()
69-
.admin()
70-
.cluster()
71-
.prepareState(TEST_REQUEST_TIMEOUT)
72-
.get()
73-
.getState()
74-
.nodes()
75-
.getMasterNode()
76-
.getName(),
77-
equalTo(masterNodeName)
78-
);
37+
38+
awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName);
39+
awaitMasterNode(internalCluster().getMasterName(), masterNodeName);
7940

8041
logger.info("--> stop master node");
8142
Settings masterDataPathSettings = internalCluster().dataPathSettings(internalCluster().getMasterName());
8243
internalCluster().stopCurrentMasterNode();
8344

84-
try {
85-
assertThat(
86-
clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
87-
.setMasterNodeTimeout(TimeValue.timeValueMillis(100))
88-
.get()
89-
.getState()
90-
.nodes()
91-
.getMasterNodeId(),
92-
nullValue()
93-
);
94-
fail("should not be able to find master");
95-
} catch (MasterNotDiscoveredException e) {
96-
// all is well, no master elected
97-
}
45+
awaitMasterNotFound();
9846

9947
logger.info("--> start previous master node again");
10048
final String nextMasterEligibleNodeName = internalCluster().startNode(
10149
Settings.builder().put(nonDataNode(masterNode())).put(masterDataPathSettings)
10250
);
103-
assertThat(
104-
internalCluster().nonMasterClient()
105-
.admin()
106-
.cluster()
107-
.prepareState(TEST_REQUEST_TIMEOUT)
108-
.get()
109-
.getState()
110-
.nodes()
111-
.getMasterNode()
112-
.getName(),
113-
equalTo(nextMasterEligibleNodeName)
114-
);
115-
assertThat(
116-
internalCluster().masterClient()
117-
.admin()
118-
.cluster()
119-
.prepareState(TEST_REQUEST_TIMEOUT)
120-
.get()
121-
.getState()
122-
.nodes()
123-
.getMasterNode()
124-
.getName(),
125-
equalTo(nextMasterEligibleNodeName)
126-
);
51+
awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligibleNodeName);
52+
awaitMasterNode(internalCluster().getMasterName(), nextMasterEligibleNodeName);
12753
}
12854

12955
public void testElectOnlyBetweenMasterNodes() throws Exception {
13056
internalCluster().setBootstrapMasterNodeIndex(0);
13157
logger.info("--> start data node / non master node");
13258
internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
133-
try {
134-
assertThat(
135-
clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT)
136-
.setMasterNodeTimeout(TimeValue.timeValueMillis(100))
137-
.get()
138-
.getState()
139-
.nodes()
140-
.getMasterNodeId(),
141-
nullValue()
142-
);
143-
fail("should not be able to find master");
144-
} catch (MasterNotDiscoveredException e) {
145-
// all is well, no master elected
146-
}
59+
awaitMasterNotFound();
60+
14761
logger.info("--> start master node (1)");
14862
final String masterNodeName = internalCluster().startMasterOnlyNode();
149-
assertThat(
150-
internalCluster().nonMasterClient()
151-
.admin()
152-
.cluster()
153-
.prepareState(TEST_REQUEST_TIMEOUT)
154-
.get()
155-
.getState()
156-
.nodes()
157-
.getMasterNode()
158-
.getName(),
159-
equalTo(masterNodeName)
160-
);
161-
assertThat(
162-
internalCluster().masterClient()
163-
.admin()
164-
.cluster()
165-
.prepareState(TEST_REQUEST_TIMEOUT)
166-
.get()
167-
.getState()
168-
.nodes()
169-
.getMasterNode()
170-
.getName(),
171-
equalTo(masterNodeName)
172-
);
63+
awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName);
64+
awaitMasterNode(internalCluster().getMasterName(), masterNodeName);
17365

17466
logger.info("--> start master node (2)");
17567
final String nextMasterEligableNodeName = internalCluster().startMasterOnlyNode();
176-
assertThat(
177-
internalCluster().nonMasterClient()
178-
.admin()
179-
.cluster()
180-
.prepareState(TEST_REQUEST_TIMEOUT)
181-
.get()
182-
.getState()
183-
.nodes()
184-
.getMasterNode()
185-
.getName(),
186-
equalTo(masterNodeName)
187-
);
188-
assertThat(
189-
internalCluster().masterClient()
190-
.admin()
191-
.cluster()
192-
.prepareState(TEST_REQUEST_TIMEOUT)
193-
.get()
194-
.getState()
195-
.nodes()
196-
.getMasterNode()
197-
.getName(),
198-
equalTo(masterNodeName)
199-
);
68+
awaitMasterNode(internalCluster().getNonMasterNodeName(), masterNodeName);
69+
awaitMasterNode(internalCluster().getMasterName(), masterNodeName);
20070

20171
logger.info("--> closing master node (1)");
20272
client().execute(
20373
TransportAddVotingConfigExclusionsAction.TYPE,
20474
new AddVotingConfigExclusionsRequest(TEST_REQUEST_TIMEOUT, masterNodeName)
20575
).get();
20676
// removing the master from the voting configuration immediately triggers the master to step down
207-
assertBusy(() -> {
208-
assertThat(
209-
internalCluster().nonMasterClient()
210-
.admin()
211-
.cluster()
212-
.prepareState(TEST_REQUEST_TIMEOUT)
213-
.get()
214-
.getState()
215-
.nodes()
216-
.getMasterNode()
217-
.getName(),
218-
equalTo(nextMasterEligableNodeName)
219-
);
220-
assertThat(
221-
internalCluster().masterClient()
222-
.admin()
223-
.cluster()
224-
.prepareState(TEST_REQUEST_TIMEOUT)
225-
.get()
226-
.getState()
227-
.nodes()
228-
.getMasterNode()
229-
.getName(),
230-
equalTo(nextMasterEligableNodeName)
231-
);
232-
});
77+
awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligableNodeName);
78+
awaitMasterNode(internalCluster().getMasterName(), nextMasterEligableNodeName);
79+
23380
internalCluster().stopNode(masterNodeName);
234-
assertThat(
235-
internalCluster().nonMasterClient()
236-
.admin()
237-
.cluster()
238-
.prepareState(TEST_REQUEST_TIMEOUT)
239-
.get()
240-
.getState()
241-
.nodes()
242-
.getMasterNode()
243-
.getName(),
244-
equalTo(nextMasterEligableNodeName)
245-
);
246-
assertThat(
247-
internalCluster().masterClient()
248-
.admin()
249-
.cluster()
250-
.prepareState(TEST_REQUEST_TIMEOUT)
251-
.get()
252-
.getState()
253-
.nodes()
254-
.getMasterNode()
255-
.getName(),
256-
equalTo(nextMasterEligableNodeName)
257-
);
81+
awaitMasterNode(internalCluster().getNonMasterNodeName(), nextMasterEligableNodeName);
82+
awaitMasterNode(internalCluster().getMasterName(), nextMasterEligableNodeName);
25883
}
25984

26085
public void testAliasFilterValidation() {

test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import org.elasticsearch.client.ResponseException;
2020
import org.elasticsearch.client.RestClient;
2121
import org.elasticsearch.client.WarningsHandler;
22+
import org.elasticsearch.cluster.metadata.IndexMetadata;
23+
import org.elasticsearch.cluster.routing.allocation.ExistingShardsAllocator;
2224
import org.elasticsearch.common.CheckedSupplier;
2325
import org.elasticsearch.common.Strings;
2426
import org.elasticsearch.common.breaker.CircuitBreakingException;
@@ -973,6 +975,10 @@ private void initIndex(String name, String bulk) throws IOException {
973975
if (indexExists(name) == false) {
974976
// not strictly required, but this can help isolate failure from bulk indexing.
975977
createIndex(name);
978+
var settings = (Map<?, ?>) ((Map<?, ?>) getIndexSettings(name).get(name)).get("settings");
979+
if (settings.containsKey(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_SETTING.getKey()) == false) {
980+
updateIndexSettings(name, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0));
981+
}
976982
}
977983
if (hasText(bulk)) {
978984
bulk(name, bulk);

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@
198198
import java.util.List;
199199
import java.util.Locale;
200200
import java.util.Map;
201+
import java.util.Optional;
201202
import java.util.Random;
202203
import java.util.Set;
203204
import java.util.concurrent.Callable;
@@ -940,6 +941,39 @@ public void waitNoPendingTasksOnAll() throws Exception {
940941
assertNoTimeout(clusterAdmin().prepareHealth(TEST_REQUEST_TIMEOUT).setWaitForEvents(Priority.LANGUID).get());
941942
}
942943

944+
/**
945+
* Waits for the node {@code viaNode} to see {@code masterNodeName} as the master node in the cluster state.
946+
* Note that this does not guarantee that all other nodes in the cluster are on the same cluster state version already.
947+
*
948+
* @param viaNode the node to check the cluster state one
949+
* @param masterNodeName the master node name that we wait for
950+
*/
951+
public void awaitMasterNode(String viaNode, String masterNodeName) {
952+
var listener = ClusterServiceUtils.addTemporaryStateListener(
953+
internalCluster().clusterService(viaNode),
954+
state -> Optional.ofNullable(state.nodes().getMasterNode()).map(m -> m.getName().equals(masterNodeName)).orElse(false),
955+
TEST_REQUEST_TIMEOUT
956+
);
957+
safeAwait(listener, TEST_REQUEST_TIMEOUT);
958+
}
959+
960+
/**
961+
* Waits for a random node in the cluster to not see a master node in the cluster state.
962+
* Note that this does not guarantee that all other nodes in the cluster are on the same cluster state version already.
963+
*/
964+
public void awaitMasterNotFound() {
965+
var viaNode = internalCluster().getRandomNodeName();
966+
// We use a temporary state listener instead of `awaitClusterState` here because the `ClusterStateObserver` doesn't run the
967+
// predicate if the cluster state version didn't change. When a master node leaves the cluster (i.e. what this method is used for),
968+
// the cluster state version is not incremented.
969+
var listener = ClusterServiceUtils.addTemporaryStateListener(
970+
internalCluster().clusterService(viaNode),
971+
state -> state.nodes().getMasterNode() == null,
972+
TEST_REQUEST_TIMEOUT
973+
);
974+
safeAwait(listener, TEST_REQUEST_TIMEOUT);
975+
}
976+
943977
/** Ensures the result counts are as expected, and logs the results if different */
944978
public void assertResultsAndLogOnFailure(long expectedResults, SearchResponse searchResponse) {
945979
final TotalHits totalHits = searchResponse.getHits().getTotalHits();

test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,15 +2033,28 @@ public String getMasterName() {
20332033
* in the viaNode parameter. If viaNode isn't specified a random node will be picked to the send the request to.
20342034
*/
20352035
public String getMasterName(@Nullable String viaNode) {
2036+
viaNode = viaNode != null ? viaNode : getRandomNodeName();
2037+
if (viaNode == null) {
2038+
throw new AssertionError("Unable to get master name, no node found");
2039+
}
20362040
try {
2037-
Client client = viaNode != null ? client(viaNode) : client();
2038-
return client.admin().cluster().prepareState(TEST_REQUEST_TIMEOUT).get().getState().nodes().getMasterNode().getName();
2041+
ClusterServiceUtils.awaitClusterState(logger, state -> state.nodes().getMasterNode() != null, clusterService(viaNode));
2042+
final ClusterState state = client(viaNode).admin().cluster().prepareState(TEST_REQUEST_TIMEOUT).setLocal(true).get().getState();
2043+
return state.nodes().getMasterNode().getName();
20392044
} catch (Exception e) {
20402045
logger.warn("Can't fetch cluster state", e);
20412046
throw new RuntimeException("Can't get master node " + e.getMessage(), e);
20422047
}
20432048
}
20442049

2050+
public String getNonMasterNodeName() {
2051+
NodeAndClient randomNodeAndClient = getRandomNodeAndClient(new NodeNamePredicate(getMasterName()).negate());
2052+
if (randomNodeAndClient != null) {
2053+
return randomNodeAndClient.getName();
2054+
}
2055+
throw new AssertionError("No non-master node found");
2056+
}
2057+
20452058
/**
20462059
* @return the name of a random node in a cluster
20472060
*/

0 commit comments

Comments
 (0)