Skip to content

Commit a4a508c

Browse files
committed
[test] Fix RetrySearchIntegTests
Don't simultaneously restart multiple nodes in a cluster. It causes data races when multiple primaries are trying to mark the `[[.snapshot-blob-cache][0]]` shard as stale. ``` org.elasticsearch.cluster.action.shard.ShardStateAction$NoLongerPrimaryShardException: primary term [2] did not match current primary term [4] at org.elasticsearch.cluster.action.shard.ShardStateAction$ShardFailedClusterStateTaskExecutor.execute(ShardStateAction.java:355) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.innerExecuteTasks(MasterService.java:1075) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:1038) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService.executeAndPublishBatch(MasterService.java:245) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.lambda$run$2(MasterService.java:1691) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.action.ActionListener.run(ActionListener.java:452) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$BatchingTaskQueue$Processor.run(MasterService.java:1688) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$5.lambda$doRun$0(MasterService.java:1283) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.action.ActionListener.run(ActionListener.java:452) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.cluster.service.MasterService$5.doRun(MasterService.java:1262) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:1044) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:27) ~[elasticsearch-8.18.0-SNAPSHOT.jar:8.18.0-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) ~[?:?] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) ~[?:?] at java.lang.Thread.run(Thread.java:1575) ~[?:?] ``` Resolve #118374 Resolve #120442
1 parent 19fe0a4 commit a4a508c

File tree

2 files changed

+4
-10
lines changed

2 files changed

+4
-10
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,6 @@ tests:
125125
- class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT
126126
method: test {p0=data_stream/120_data_streams_stats/Multiple data stream}
127127
issue: https://github.com/elastic/elasticsearch/issues/118217
128-
- class: org.elasticsearch.xpack.searchablesnapshots.RetrySearchIntegTests
129-
method: testSearcherId
130-
issue: https://github.com/elastic/elasticsearch/issues/118374
131128
- class: org.elasticsearch.xpack.ccr.rest.ShardChangesRestIT
132129
method: testShardChangesNoOperation
133130
issue: https://github.com/elastic/elasticsearch/issues/118800

x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/RetrySearchIntegTests.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
3535
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse;
3636
import static org.hamcrest.Matchers.equalTo;
37+
import static org.hamcrest.Matchers.in;
3738

3839
public class RetrySearchIntegTests extends BaseSearchableSnapshotsIntegTestCase {
3940

@@ -87,10 +88,8 @@ public void testSearcherId() throws Exception {
8788
}
8889
}
8990

90-
for (String allocatedNode : allocatedNodes) {
91-
if (randomBoolean()) {
92-
internalCluster().restartNode(allocatedNode);
93-
}
91+
if (randomBoolean()) {
92+
internalCluster().restartNode(randomFrom(allocatedNodes));
9493
}
9594
ensureGreen(indexName);
9695
allocatedNodes = internalCluster().nodesInclude(indexName);
@@ -149,9 +148,7 @@ public void testRetryPointInTime() throws Exception {
149148
assertHitCount(resp, docCount);
150149
});
151150
final Set<String> allocatedNodes = internalCluster().nodesInclude(indexName);
152-
for (String allocatedNode : allocatedNodes) {
153-
internalCluster().restartNode(allocatedNode);
154-
}
151+
internalCluster().restartNode(randomFrom(allocatedNodes));
155152
ensureGreen(indexName);
156153
assertNoFailuresAndResponse(
157154
prepareSearch().setQuery(new RangeQueryBuilder("created_date").gte("2011-01-01").lte("2011-12-12"))

0 commit comments

Comments
 (0)