From 95c409be9eef7e2f9c5b9d659a81a7107e3ae4b1 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 14 Jul 2025 12:04:38 +1000 Subject: [PATCH 1/3] [Test] Wait on master node for shard started The shard started may not be visible on the master node if the wait is on a data node. In that case, the DiskThreshold monitor may use stale cluster state for releasing read-only blocks. This PR fixes it by waiting on the master node, which is the behaviour before #129872. Resolves: #131146 --- .../allocation/DiskThresholdMonitorIT.java | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java index a4533a674fe70..1a37cd4e2fa00 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java @@ -13,6 +13,7 @@ import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.elasticsearch.cluster.DiskUsageIntegTestCase; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; @@ -86,20 +87,12 @@ public void testFloodStageExceeded() throws Exception { // Verify that we can still move shards around even while blocked final String newDataNodeName = internalCluster().startDataOnlyNode(); final String newDataNodeId = clusterAdmin().prepareNodesInfo(newDataNodeName).get().getNodes().get(0).getNode().getId(); - assertBusy(() -> { - final ShardRouting primaryShard = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT) - .clear() - .setRoutingTable(true) - .setNodes(true) - .setIndices(indexName) - .get() - .getState() - .routingTable() - .index(indexName) - .shard(0) - .primaryShard(); - assertThat(primaryShard.state(), equalTo(ShardRoutingState.STARTED)); - assertThat(primaryShard.currentNodeId(), equalTo(newDataNodeId)); + awaitClusterState(state -> { + final ShardRouting primaryShard = state.routingTable(ProjectId.DEFAULT).index(indexName).shard(0).primaryShard(); + if (primaryShard.state() != ShardRoutingState.STARTED) { + return false; + } + return newDataNodeId.equals(primaryShard.currentNodeId()); }); // Verify that the block is removed once the shard migration is complete From acd89698c3ad2fd60370f765edcb195c2561773c Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 14 Jul 2025 17:33:41 +1000 Subject: [PATCH 2/3] Update server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java Co-authored-by: David Turner --- .../cluster/routing/allocation/DiskThresholdMonitorIT.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java index 1a37cd4e2fa00..bb19c9d477a45 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitorIT.java @@ -89,10 +89,7 @@ public void testFloodStageExceeded() throws Exception { final String newDataNodeId = clusterAdmin().prepareNodesInfo(newDataNodeName).get().getNodes().get(0).getNode().getId(); awaitClusterState(state -> { final ShardRouting primaryShard = state.routingTable(ProjectId.DEFAULT).index(indexName).shard(0).primaryShard(); - if (primaryShard.state() != ShardRoutingState.STARTED) { - return false; - } - return newDataNodeId.equals(primaryShard.currentNodeId()); + return primaryShard.state() == ShardRoutingState.STARTED && newDataNodeId.equals(primaryShard.currentNodeId()); }); // Verify that the block is removed once the shard migration is complete From b713053f0f0eda01c11464b2d0252b665d86802d Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 15 Jul 2025 09:05:24 +1000 Subject: [PATCH 3/3] unmute --- muted-tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 5487810e2fe1e..6b52f2456dc1e 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -535,9 +535,6 @@ tests: - class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT method: test {p0=field_caps/40_time_series/Get simple time series field caps} issue: https://github.com/elastic/elasticsearch/issues/131225 -- class: org.elasticsearch.cluster.routing.allocation.DiskThresholdMonitorIT - method: testFloodStageExceeded - issue: https://github.com/elastic/elasticsearch/issues/131146 - class: org.elasticsearch.packaging.test.DockerTests method: test090SecurityCliPackaging issue: https://github.com/elastic/elasticsearch/issues/131107