Skip to content

Commit 5ffa07c

Browse files
authored
[Test] Add more logs for WriteLoadConstraintDeciderIT (elastic#137171)
Add cluster state dumps on setup failure. Relates: elastic#137162
1 parent a9a9f0c commit 5ffa07c

File tree

2 files changed

+33
-75
lines changed

2 files changed

+33
-75
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -501,9 +501,6 @@ tests:
501501
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeMetricsIT
502502
method: test
503503
issue: https://github.com/elastic/elasticsearch/issues/137071
504-
- class: org.elasticsearch.cluster.routing.allocation.decider.WriteLoadConstraintDeciderIT
505-
method: testCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferred
506-
issue: https://github.com/elastic/elasticsearch/issues/137162
507504
- class: org.elasticsearch.xpack.ml.integration.DatafeedJobsIT
508505
method: testDatafeedTimingStats_DatafeedRecreated
509506
issue: https://github.com/elastic/elasticsearch/issues/137207

server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java

Lines changed: 33 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,6 @@
6464
import java.util.stream.StreamSupport;
6565

6666
import static java.util.stream.IntStream.range;
67-
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
68-
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
6967
import static org.hamcrest.Matchers.equalTo;
7068
import static org.hamcrest.Matchers.everyItem;
7169
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -146,7 +144,7 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() {
146144
* second node, since the third is reporting as hot-spotted and should not accept any shards.
147145
*/
148146

149-
logger.info("---> Refreshing the cluster info to pull in the dummy thread pool stats with a hot-spotting node");
147+
logger.info("--> Refreshing the cluster info to pull in the dummy thread pool stats with a hot-spotting node");
150148
refreshClusterInfo();
151149

152150
var temporaryClusterStateListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> {
@@ -165,24 +163,14 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() {
165163

166164
try {
167165
logger.info(
168-
"---> Update the filter to exclude " + harness.firstDataNodeName + " so shards will be reassigned away to the other nodes"
166+
"--> Update the filter to exclude " + harness.firstDataNodeName + " so shards will be reassigned away to the other nodes"
169167
);
170168
// Updating the cluster settings will trigger a reroute request, no need to explicitly request one in the test.
171169
updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", harness.firstDataNodeName));
172170

173171
safeAwait(temporaryClusterStateListener);
174172
} catch (AssertionError error) {
175-
ClusterState state = internalCluster().client()
176-
.admin()
177-
.cluster()
178-
.prepareState(TEST_REQUEST_TIMEOUT)
179-
.clear()
180-
.setMetadata(true)
181-
.setNodes(true)
182-
.setRoutingTable(true)
183-
.get()
184-
.getState();
185-
logger.info("---> Failed to reach expected allocation state. Dumping assignments: " + state.getRoutingNodes());
173+
dumpClusterState();
186174
throw error;
187175
}
188176
}
@@ -251,7 +239,7 @@ public void testShardsAreAssignedToNotPreferredWhenAlternativeIsNo() {
251239
* the second and third node reporting hot-spotting: a canRemain::NO response should override a canAllocate::NOT_PREFERRED answer.
252240
*/
253241

254-
logger.info("---> Refreshing the cluster info to pull in the dummy thread pool stats with a hot-spotting node");
242+
logger.info("--> Refreshing the cluster info to pull in the dummy thread pool stats with a hot-spotting node");
255243
refreshClusterInfo();
256244

257245
var temporaryClusterStateListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> {
@@ -265,25 +253,13 @@ public void testShardsAreAssignedToNotPreferredWhenAlternativeIsNo() {
265253
});
266254

267255
try {
268-
logger.info(
269-
"---> Update the filter to remove exclusions so that shards can be reassigned based on the write load decider only"
270-
);
256+
logger.info("--> Update the filter to remove exclusions so that shards can be reassigned based on the write load decider only");
271257
// Updating the cluster settings will trigger a reroute request, no need to explicitly request one in the test.
272-
updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", ""));
258+
updateClusterSettings(Settings.builder().putNull("cluster.routing.allocation.exclude._name"));
273259

274260
safeAwait(temporaryClusterStateListener);
275261
} catch (AssertionError error) {
276-
ClusterState state = internalCluster().client()
277-
.admin()
278-
.cluster()
279-
.prepareState(TEST_REQUEST_TIMEOUT)
280-
.clear()
281-
.setMetadata(true)
282-
.setNodes(true)
283-
.setRoutingTable(true)
284-
.get()
285-
.getState();
286-
logger.info("---> Failed to reach expected allocation state. Dumping assignments: " + state.getRoutingNodes());
262+
dumpClusterState();
287263
throw error;
288264
}
289265
}
@@ -351,17 +327,15 @@ public void testCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferre
351327

352328
// Wait for the DesiredBalance to be recomputed as a result of the ClusterInfo refresh. Ensures no async computation.
353329
MockLog.awaitLogger(() -> {
354-
logger.info("---> Refreshing the cluster info to pull in the dummy thread pool stats with hot-spot stats");
330+
logger.info("--> Refreshing the cluster info to pull in the dummy thread pool stats with hot-spot stats");
355331
refreshClusterInfo();
356332
}, DesiredBalanceShardsAllocator.class, createBalancerConvergedSeenEvent());
357333

358334
// Wait for the DesiredBalance to be recomputed as a result of the settings change.
359335
MockLog.awaitLogger(() -> {
360-
logger.info(
361-
"---> Update the filter to remove exclusions so that shards can be reassigned based on the write load decider only"
362-
);
336+
logger.info("--> Update the filter to remove exclusions so that shards can be reassigned based on the write load decider only");
363337
// Updating the cluster settings will trigger a reroute request.
364-
updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", ""));
338+
updateClusterSettings(Settings.builder().putNull("cluster.routing.allocation.exclude._name"));
365339
}, DesiredBalanceShardsAllocator.class, createBalancerConvergedSeenEvent());
366340

367341
try {
@@ -371,23 +345,14 @@ public void testCanRemainNotPreferredIsIgnoredWhenAllOtherNodesReturnNotPreferre
371345
client().execute(TransportGetDesiredBalanceAction.TYPE, new DesiredBalanceRequest(TEST_REQUEST_TIMEOUT))
372346
);
373347
Map<Integer, DesiredBalanceResponse.DesiredShards> shardsMap = desiredBalanceResponse.getRoutingTable().get(harness.indexName);
374-
logger.info("---> Checking desired shard assignments are still on the first data node. Desired assignments: " + shardsMap);
348+
logger.info("--> Checking desired shard assignments are still on the first data node. Desired assignments: " + shardsMap);
375349
for (var desiredShard : shardsMap.values()) {
376350
for (var desiredNodeId : desiredShard.desired().nodeIds()) {
377351
assertEquals("Found a shard assigned to an unexpected node: " + shardsMap, desiredNodeId, harness.firstDataNodeId);
378352
}
379353
}
380354
} catch (AssertionError error) {
381-
ClusterState state = client().admin()
382-
.cluster()
383-
.prepareState(TEST_REQUEST_TIMEOUT)
384-
.clear()
385-
.setMetadata(true)
386-
.setNodes(true)
387-
.setRoutingTable(true)
388-
.get()
389-
.getState();
390-
logger.info("---> Failed to reach expected allocation state. Dumping assignments: " + state.getRoutingNodes());
355+
dumpClusterState();
391356
throw error;
392357
}
393358
}
@@ -452,17 +417,15 @@ public void testCanRemainRelocatesOneShardWhenAHotSpotOccurs() {
452417

453418
// Wait for the DesiredBalance to be recomputed as a result of the ClusterInfo refresh. This way nothing async is running.
454419
MockLog.awaitLogger(() -> {
455-
logger.info("---> Refreshing the cluster info to pull in the dummy thread pool stats with hot-spot stats");
420+
logger.info("--> Refreshing the cluster info to pull in the dummy thread pool stats with hot-spot stats");
456421
refreshClusterInfo();
457422
}, DesiredBalanceShardsAllocator.class, createBalancerConvergedSeenEvent());
458423

459424
// Wait for the DesiredBalance to be recomputed as a result of the settings change.
460425
MockLog.awaitLogger(() -> {
461-
logger.info(
462-
"---> Update the filter to remove exclusions so that shards can be reassigned based on the write load decider only"
463-
);
426+
logger.info("--> Update the filter to remove exclusions so that shards can be reassigned based on the write load decider only");
464427
// Updating the cluster settings will trigger a reroute request.
465-
updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", ""));
428+
updateClusterSettings(Settings.builder().putNull("cluster.routing.allocation.exclude._name"));
466429
}, DesiredBalanceShardsAllocator.class, createBalancerConvergedSeenEvent());
467430

468431
try {
@@ -471,7 +434,7 @@ public void testCanRemainRelocatesOneShardWhenAHotSpotOccurs() {
471434
client().execute(TransportGetDesiredBalanceAction.TYPE, new DesiredBalanceRequest(TEST_REQUEST_TIMEOUT))
472435
);
473436
Map<Integer, DesiredBalanceResponse.DesiredShards> shardsMap = desiredBalanceResponse.getRoutingTable().get(harness.indexName);
474-
logger.info("---> Checking desired shard assignments. Desired assignments: " + shardsMap);
437+
logger.info("--> Checking desired shard assignments. Desired assignments: " + shardsMap);
475438
int countShardsStillAssignedToFirstNode = 0;
476439
for (var desiredShard : shardsMap.values()) {
477440
for (var desiredNodeId : desiredShard.desired().nodeIds()) {
@@ -487,16 +450,7 @@ public void testCanRemainRelocatesOneShardWhenAHotSpotOccurs() {
487450
);
488451
assertThatTheBestShardWasMoved(harness, originalClusterState, desiredBalanceResponse);
489452
} catch (AssertionError error) {
490-
ClusterState state = client().admin()
491-
.cluster()
492-
.prepareState(TEST_REQUEST_TIMEOUT)
493-
.clear()
494-
.setMetadata(true)
495-
.setNodes(true)
496-
.setRoutingTable(true)
497-
.get()
498-
.getState();
499-
logger.info("---> Failed to reach expected allocation state. Dumping assignments: " + state.getRoutingNodes());
453+
dumpClusterState();
500454
throw error;
501455
}
502456
}
@@ -789,7 +743,7 @@ private TestHarness setUpThreeTestNodesAndAllIndexShardsOnFirstNode() {
789743
final DiscoveryNode thirdDiscoveryNode = getDiscoveryNode(thirdDataNodeName);
790744

791745
logger.info(
792-
"---> first node name "
746+
"--> first node name "
793747
+ firstDataNodeName
794748
+ " and ID "
795749
+ firstDataNodeId
@@ -804,7 +758,7 @@ private TestHarness setUpThreeTestNodesAndAllIndexShardsOnFirstNode() {
804758
);
805759

806760
logger.info(
807-
"---> utilization threshold: "
761+
"--> utilization threshold: "
808762
+ randomUtilizationThresholdPercent
809763
+ ", write threads: "
810764
+ randomNumberOfWritePoolThreads
@@ -817,7 +771,7 @@ private TestHarness setUpThreeTestNodesAndAllIndexShardsOnFirstNode() {
817771
* Then create an index with many shards, which will all be assigned to the first data node.
818772
*/
819773

820-
logger.info("---> Limit shard assignment to node " + firstDataNodeName + " by excluding the other nodes");
774+
logger.info("--> Limit shard assignment to node " + firstDataNodeName + " by excluding the other nodes");
821775
updateClusterSettings(
822776
Settings.builder().put("cluster.routing.allocation.exclude._name", secondDataNodeName + "," + thirdDataNodeName)
823777
);
@@ -849,14 +803,16 @@ private TestHarness setUpThreeTestNodesAndAllIndexShardsOnFirstNode() {
849803
);
850804
});
851805

852-
createIndex(
853-
indexName,
854-
Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomNumberOfShards).put(SETTING_NUMBER_OF_REPLICAS, 0).build()
855-
);
806+
createIndex(indexName, randomNumberOfShards, 0);
856807
ensureGreen(indexName);
857808

858-
logger.info("---> Waiting for all [" + randomNumberOfShards + "] shards to be assigned to node " + firstDataNodeName);
859-
safeAwait(verifyAssignmentToFirstNodeListener);
809+
logger.info("--> Waiting for all [" + randomNumberOfShards + "] shards to be assigned to node " + firstDataNodeName);
810+
try {
811+
safeAwait(verifyAssignmentToFirstNodeListener);
812+
} catch (AssertionError error) {
813+
dumpClusterState();
814+
throw error;
815+
}
860816

861817
return new TestHarness(
862818
firstDataNodeName,
@@ -878,6 +834,11 @@ private TestHarness setUpThreeTestNodesAndAllIndexShardsOnFirstNode() {
878834
);
879835
}
880836

837+
private void dumpClusterState() {
838+
logger.info("--> Failed to reach expected allocation state. Dumping cluster state");
839+
logClusterState();
840+
}
841+
881842
/**
882843
* Carries set-up state from {@link #setUpThreeTestNodesAndAllIndexShardsOnFirstNode()} to the testing logic.
883844
*/

0 commit comments

Comments
 (0)