Skip to content

Commit 04cbfc0

Browse files
authored
[Test] Put shutdown marker on the last upgraded node only (#132157) (#132233)
This is the same failure as observed in #129644 for which the original fix #129680 did not really work. It did not work because the the ordering of checks. The shutdown marker is removed after the cluster passes ready check so that new shards can be allocated. But the cluster cannot pass the ready check before the shards are allocated. Hence the circular dependency. In hindsight, there is no need to put shutdown record for all nodes. It is only needed on the node that upgrades the last to prevent snapshot from completion during the upgrade process. This PR does that which ensures there are always 2 nodes for hosting new shards. Resolves: #132135 Resolves: #132136 Resolves: #132137 (cherry picked from commit f39ccb5) # Conflicts: # muted-tests.yml
1 parent efa8cf1 commit 04cbfc0

File tree

1 file changed

+25
-34
lines changed

1 file changed

+25
-34
lines changed

qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/RunningSnapshotIT.java

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,10 @@
1212
import com.carrotsearch.randomizedtesting.annotations.Name;
1313

1414
import org.elasticsearch.client.Request;
15-
import org.elasticsearch.common.Strings;
1615
import org.elasticsearch.common.settings.Settings;
1716
import org.elasticsearch.test.rest.ObjectPath;
1817

1918
import java.io.IOException;
20-
import java.util.Collection;
2119
import java.util.Map;
2220
import java.util.stream.Collectors;
2321

@@ -26,7 +24,6 @@
2624
import static org.hamcrest.Matchers.containsInAnyOrder;
2725
import static org.hamcrest.Matchers.empty;
2826
import static org.hamcrest.Matchers.equalTo;
29-
import static org.hamcrest.Matchers.hasSize;
3027
import static org.hamcrest.Matchers.not;
3128

3229
public class RunningSnapshotIT extends AbstractRollingUpgradeTestCase {
@@ -45,6 +42,13 @@ public void testRunningSnapshotCompleteAfterUpgrade() throws Exception {
4542
.collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, entry -> entry.getValue().get("name").toString()));
4643
assertThat(nodeIdToNodeNames.values(), containsInAnyOrder("test-cluster-0", "test-cluster-1", "test-cluster-2"));
4744

45+
final var lastUpgradeNodeId = nodeIdToNodeNames.entrySet()
46+
.stream()
47+
.filter(entry -> "test-cluster-2".equals(entry.getValue()))
48+
.map(Map.Entry::getKey)
49+
.findFirst()
50+
.orElseThrow(() -> new AssertionError("node id not found in " + nodeIdToNodeNames));
51+
4852
if (isOldCluster()) {
4953
registerRepository(repositoryName, "fs", randomBoolean(), Settings.builder().put("location", "backup").build());
5054
// create an index to have one shard per node
@@ -54,54 +58,41 @@ public void testRunningSnapshotCompleteAfterUpgrade() throws Exception {
5458
indexDocs(indexName, between(10, 50));
5559
}
5660
flush(indexName, true);
57-
// Signal shutdown to prevent snapshot from being completed
58-
putShutdownMetadata(nodeIdToNodeNames.keySet());
61+
// Signal shutdown for the last node to upgrade to prevent snapshot from being completed during the upgrade process
62+
putShutdownMetadata(lastUpgradeNodeId);
5963
createSnapshot(repositoryName, snapshotName, false);
6064
assertRunningSnapshot(repositoryName, snapshotName);
6165
} else {
6266
if (isUpgradedCluster()) {
63-
deleteShutdownMetadata(nodeIdToNodeNames.keySet());
64-
assertNoShutdownMetadata(nodeIdToNodeNames.keySet());
67+
deleteShutdownMetadata(lastUpgradeNodeId);
68+
assertNoShutdownMetadata(lastUpgradeNodeId);
6569
ensureGreen(indexName);
6670
assertBusy(() -> assertCompletedSnapshot(repositoryName, snapshotName));
6771
} else {
68-
if (isFirstMixedCluster()) {
69-
final var upgradedNodeIds = nodeIdToNodeNames.entrySet()
70-
.stream()
71-
.filter(entry -> "test-cluster-0".equals(entry.getValue()))
72-
.map(Map.Entry::getKey)
73-
.collect(Collectors.toUnmodifiableSet());
74-
assertThat(upgradedNodeIds, hasSize(1));
75-
deleteShutdownMetadata(upgradedNodeIds);
76-
}
7772
assertRunningSnapshot(repositoryName, snapshotName);
7873
}
7974
}
8075
}
8176

82-
private void putShutdownMetadata(Collection<String> nodeIds) throws IOException {
83-
for (String nodeId : nodeIds) {
84-
final Request putShutdownRequest = new Request("PUT", "/_nodes/" + nodeId + "/shutdown");
85-
putShutdownRequest.setJsonEntity("""
86-
{
87-
"type": "remove",
88-
"reason": "test"
89-
}""");
90-
client().performRequest(putShutdownRequest);
91-
}
77+
private void putShutdownMetadata(String nodeId) throws IOException {
78+
final Request putShutdownRequest = new Request("PUT", "/_nodes/" + nodeId + "/shutdown");
79+
putShutdownRequest.setJsonEntity("""
80+
{
81+
"type": "remove",
82+
"reason": "test"
83+
}""");
84+
client().performRequest(putShutdownRequest);
9285
}
9386

94-
private void deleteShutdownMetadata(Collection<String> nodeIds) throws IOException {
95-
for (String nodeId : nodeIds) {
96-
final Request request = new Request("DELETE", "/_nodes/" + nodeId + "/shutdown");
97-
request.addParameter(IGNORE_RESPONSE_CODES_PARAM, "404");
98-
client().performRequest(request);
99-
}
87+
private void deleteShutdownMetadata(String nodeId) throws IOException {
88+
final Request request = new Request("DELETE", "/_nodes/" + nodeId + "/shutdown");
89+
request.addParameter(IGNORE_RESPONSE_CODES_PARAM, "404");
90+
client().performRequest(request);
10091
}
10192

102-
private void assertNoShutdownMetadata(Collection<String> nodeIds) throws IOException {
93+
private void assertNoShutdownMetadata(String nodeId) throws IOException {
10394
final ObjectPath responsePath = assertOKAndCreateObjectPath(
104-
client().performRequest(new Request("GET", "/_nodes/" + Strings.collectionToCommaDelimitedString(nodeIds) + "/shutdown"))
95+
client().performRequest(new Request("GET", "/_nodes/" + nodeId + "/shutdown"))
10596
);
10697
assertThat(responsePath.evaluate("nodes"), empty());
10798
}

0 commit comments

Comments
 (0)