Skip to content

Commit 6858c32

Browse files
authored
[Test] Allow allocation in mixed cluster (#129680)
The RunningSnapshotIT upgrade test adds shutdown markers to all nodes and removes them once all nodes are upgraded. If an index gets created in a mixed cluster, for example by ILM or deprecation messages, the index cannot be allocated because all nodes are shutting down. Since the cluster ready check between node upgrades expects a yellow cluster, the unassigned index prevents the ready check to succeed and eventually timeout. This PR fixes it by removing shutdown marker for the 1st upgrade node to allow it hosting new indices. Resolves: #129644 Resolves: #129645 Resolves: #129646
1 parent 5449d95 commit 6858c32

File tree

2 files changed

+23
-13
lines changed

2 files changed

+23
-13
lines changed

muted-tests.yml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -571,15 +571,6 @@ tests:
571571
- class: org.elasticsearch.server.cli.MachineDependentHeapTests
572572
method: testMlOnlyOptions
573573
issue: https://github.com/elastic/elasticsearch/issues/129236
574-
- class: org.elasticsearch.upgrades.RunningSnapshotIT
575-
method: testRunningSnapshotCompleteAfterUpgrade {upgradedNodes=1}
576-
issue: https://github.com/elastic/elasticsearch/issues/129644
577-
- class: org.elasticsearch.upgrades.RunningSnapshotIT
578-
method: testRunningSnapshotCompleteAfterUpgrade {upgradedNodes=2}
579-
issue: https://github.com/elastic/elasticsearch/issues/129645
580-
- class: org.elasticsearch.upgrades.RunningSnapshotIT
581-
method: testRunningSnapshotCompleteAfterUpgrade {upgradedNodes=3}
582-
issue: https://github.com/elastic/elasticsearch/issues/129646
583574
- class: org.elasticsearch.test.apmintegration.TracesApmIT
584575
method: testApmIntegration
585576
issue: https://github.com/elastic/elasticsearch/issues/129651

qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/RunningSnapshotIT.java

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,15 @@
1818

1919
import java.io.IOException;
2020
import java.util.Collection;
21+
import java.util.Map;
22+
import java.util.stream.Collectors;
2123

24+
import static org.elasticsearch.client.RestClient.IGNORE_RESPONSE_CODES_PARAM;
2225
import static org.elasticsearch.upgrades.SnapshotBasedRecoveryIT.indexDocs;
26+
import static org.hamcrest.Matchers.containsInAnyOrder;
2327
import static org.hamcrest.Matchers.empty;
2428
import static org.hamcrest.Matchers.equalTo;
29+
import static org.hamcrest.Matchers.hasSize;
2530
import static org.hamcrest.Matchers.not;
2631

2732
public class RunningSnapshotIT extends AbstractRollingUpgradeTestCase {
@@ -34,7 +39,11 @@ public void testRunningSnapshotCompleteAfterUpgrade() throws Exception {
3439
final String indexName = "index";
3540
final String repositoryName = "repo";
3641
final String snapshotName = "snapshot";
37-
final var nodeIds = getNodesInfo(client()).keySet();
42+
final Map<String, Map<?, ?>> nodesInfo = getNodesInfo(client());
43+
final var nodeIdToNodeNames = nodesInfo.entrySet()
44+
.stream()
45+
.collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, entry -> entry.getValue().get("name").toString()));
46+
assertThat(nodeIdToNodeNames.values(), containsInAnyOrder("test-cluster-0", "test-cluster-1", "test-cluster-2"));
3847

3948
if (isOldCluster()) {
4049
registerRepository(repositoryName, "fs", randomBoolean(), Settings.builder().put("location", "backup").build());
@@ -46,16 +55,25 @@ public void testRunningSnapshotCompleteAfterUpgrade() throws Exception {
4655
}
4756
flush(indexName, true);
4857
// Signal shutdown to prevent snapshot from being completed
49-
putShutdownMetadata(nodeIds);
58+
putShutdownMetadata(nodeIdToNodeNames.keySet());
5059
createSnapshot(repositoryName, snapshotName, false);
5160
assertRunningSnapshot(repositoryName, snapshotName);
5261
} else {
5362
if (isUpgradedCluster()) {
54-
deleteShutdownMetadata(nodeIds);
55-
assertNoShutdownMetadata(nodeIds);
63+
deleteShutdownMetadata(nodeIdToNodeNames.keySet());
64+
assertNoShutdownMetadata(nodeIdToNodeNames.keySet());
5665
ensureGreen(indexName);
5766
assertBusy(() -> assertCompletedSnapshot(repositoryName, snapshotName));
5867
} else {
68+
if (isFirstMixedCluster()) {
69+
final var upgradedNodeIds = nodeIdToNodeNames.entrySet()
70+
.stream()
71+
.filter(entry -> "test-cluster-0".equals(entry.getValue()))
72+
.map(Map.Entry::getKey)
73+
.collect(Collectors.toUnmodifiableSet());
74+
assertThat(upgradedNodeIds, hasSize(1));
75+
deleteShutdownMetadata(upgradedNodeIds);
76+
}
5977
assertRunningSnapshot(repositoryName, snapshotName);
6078
}
6179
}
@@ -76,6 +94,7 @@ private void putShutdownMetadata(Collection<String> nodeIds) throws IOException
7694
private void deleteShutdownMetadata(Collection<String> nodeIds) throws IOException {
7795
for (String nodeId : nodeIds) {
7896
final Request request = new Request("DELETE", "/_nodes/" + nodeId + "/shutdown");
97+
request.addParameter(IGNORE_RESPONSE_CODES_PARAM, "404");
7998
client().performRequest(request);
8099
}
81100
}

0 commit comments

Comments
 (0)