Skip to content

Commit 0be75e1

Browse files
authored
Fix testSearchAndRelocateConcurrently (#116806)
This aims to test we can search through replica shard relocations. However, the way the test was written it was sometimes also starting another data node. The concurrent search requests would sometimes hit this new node, before its cluster state was RECOVERED. The search action throws exception when the cluster state is not recovered as it needs to be able to read the cluster state. This fixes the test to grab a coy of the bootstrapped nodes and use them when calling the _search API before the cluster (potentially) resizes.
1 parent 97f587e commit 0be75e1

File tree

2 files changed

+31
-29
lines changed

2 files changed

+31
-29
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,6 @@ tests:
171171
- class: org.elasticsearch.search.basic.SearchWithRandomDisconnectsIT
172172
method: testSearchWithRandomDisconnects
173173
issue: https://github.com/elastic/elasticsearch/issues/116175
174-
- class: org.elasticsearch.search.basic.SearchWhileRelocatingIT
175-
method: testSearchAndRelocateConcurrentlyRandomReplicas
176-
issue: https://github.com/elastic/elasticsearch/issues/116145
177174
- class: org.elasticsearch.xpack.deprecation.DeprecationHttpIT
178175
method: testDeprecatedSettingsReturnWarnings
179176
issue: https://github.com/elastic/elasticsearch/issues/108628

server/src/internalClusterTest/java/org/elasticsearch/search/basic/SearchWhileRelocatingIT.java

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ private void testSearchAndRelocateConcurrently(final int numberOfReplicas) throw
6464
}
6565
indexRandom(true, indexBuilders.toArray(new IndexRequestBuilder[indexBuilders.size()]));
6666
assertHitCount(prepareSearch(), (numDocs));
67+
// hold a copy of the node names before a new node is potentially added later
68+
String[] nodeNamesBeforeClusterResize = internalCluster().getNodeNames();
6769
final int numIters = scaledRandomIntBetween(5, 20);
6870
for (int i = 0; i < numIters; i++) {
6971
final AtomicBoolean stop = new AtomicBoolean(false);
@@ -76,34 +78,37 @@ private void testSearchAndRelocateConcurrently(final int numberOfReplicas) throw
7678
public void run() {
7779
try {
7880
while (stop.get() == false) {
79-
assertResponse(prepareSearch().setSize(numDocs), response -> {
80-
if (response.getHits().getTotalHits().value() != numDocs) {
81-
// if we did not search all shards but had no serious failures that is potentially fine
82-
// if only the hit-count is wrong. this can happen if the cluster-state is behind when the
83-
// request comes in. It's a small window but a known limitation.
84-
if (response.getTotalShards() != response.getSuccessfulShards()
85-
&& Stream.of(response.getShardFailures())
86-
.allMatch(ssf -> ssf.getCause() instanceof NoShardAvailableActionException)) {
87-
nonCriticalExceptions.add(
88-
"Count is "
89-
+ response.getHits().getTotalHits().value()
90-
+ " but "
91-
+ numDocs
92-
+ " was expected. "
93-
+ formatShardStatus(response)
94-
);
95-
} else {
96-
assertHitCount(response, numDocs);
81+
assertResponse(
82+
client(randomFrom(nodeNamesBeforeClusterResize)).prepareSearch().setSize(numDocs),
83+
response -> {
84+
if (response.getHits().getTotalHits().value() != numDocs) {
85+
// if we did not search all shards but had no serious failures that is potentially fine
86+
// if only the hit-count is wrong. this can happen if the cluster-state is behind when the
87+
// request comes in. It's a small window but a known limitation.
88+
if (response.getTotalShards() != response.getSuccessfulShards()
89+
&& Stream.of(response.getShardFailures())
90+
.allMatch(ssf -> ssf.getCause() instanceof NoShardAvailableActionException)) {
91+
nonCriticalExceptions.add(
92+
"Count is "
93+
+ response.getHits().getTotalHits().value()
94+
+ " but "
95+
+ numDocs
96+
+ " was expected. "
97+
+ formatShardStatus(response)
98+
);
99+
} else {
100+
assertHitCount(response, numDocs);
101+
}
97102
}
98-
}
99103

100-
final SearchHits sh = response.getHits();
101-
assertThat(
102-
"Expected hits to be the same size the actual hits array",
103-
sh.getTotalHits().value(),
104-
equalTo((long) (sh.getHits().length))
105-
);
106-
});
104+
final SearchHits sh = response.getHits();
105+
assertThat(
106+
"Expected hits to be the same size the actual hits array",
107+
sh.getTotalHits().value(),
108+
equalTo((long) (sh.getHits().length))
109+
);
110+
}
111+
);
107112
// this is the more critical but that we hit the actual hit array has a different size than the
108113
// actual number of hits.
109114
}

0 commit comments

Comments
 (0)