Skip to content

Commit f078c38

Browse files
authored
Fix SearchTimeoutIT (#120390) (#122191)
Two of the timeout tests have been muted for several months. The reason is that we tightened the assertions to cover for partial results being returned, but there were edge cases in which partial results were not actually returned. The timeout used in the test was time dependent, hence when the timeout precisely will be thrown is unpredictable, because we have timeout checks in different places in the codebase, when iterating through the leaves, before scoring any document, or while scoring documents. The edge case that caused failures is a typical timing issue where the initial check for timeout in CancellableBulkScorer already triggers the timeout, before any document has been collected. I made several adjustments to the test to make it more robust: - use index random to index documents, that speeds it up - share indexing across test methods, so that it happens once at the suite level - replace the custom query that triggers a timeout to not be a script query, but rather a lucene query that is not time dependent, and throws a time exceeded exception precisely where we expect it, so that we can test how the system reacts to that. That allows to test that partial results are always returned when a timeout happens while scoring documents, and that partial results are never returned when a timeout happens before we even started to score documents. Closes #98369 Closes #98053
1 parent 5634676 commit f078c38

File tree

1 file changed

+260
-62
lines changed

1 file changed

+260
-62
lines changed

server/src/internalClusterTest/java/org/elasticsearch/search/SearchTimeoutIT.java

Lines changed: 260 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -9,112 +9,310 @@
99

1010
package org.elasticsearch.search;
1111

12+
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.search.BulkScorer;
14+
import org.apache.lucene.search.ConstantScoreScorer;
15+
import org.apache.lucene.search.ConstantScoreWeight;
16+
import org.apache.lucene.search.DocIdSetIterator;
17+
import org.apache.lucene.search.IndexSearcher;
18+
import org.apache.lucene.search.LeafCollector;
19+
import org.apache.lucene.search.Query;
20+
import org.apache.lucene.search.QueryVisitor;
21+
import org.apache.lucene.search.Scorable;
22+
import org.apache.lucene.search.ScoreMode;
23+
import org.apache.lucene.search.Scorer;
24+
import org.apache.lucene.search.ScorerSupplier;
25+
import org.apache.lucene.search.Weight;
26+
import org.apache.lucene.util.Bits;
1227
import org.elasticsearch.ElasticsearchException;
13-
import org.elasticsearch.action.search.SearchResponse;
28+
import org.elasticsearch.TransportVersion;
29+
import org.elasticsearch.action.search.SearchRequestBuilder;
30+
import org.elasticsearch.common.io.stream.StreamInput;
31+
import org.elasticsearch.common.io.stream.StreamOutput;
1432
import org.elasticsearch.common.settings.Settings;
1533
import org.elasticsearch.core.TimeValue;
34+
import org.elasticsearch.index.query.AbstractQueryBuilder;
35+
import org.elasticsearch.index.query.QueryBuilder;
36+
import org.elasticsearch.index.query.SearchExecutionContext;
1637
import org.elasticsearch.plugins.Plugin;
17-
import org.elasticsearch.script.MockScriptPlugin;
18-
import org.elasticsearch.script.Script;
19-
import org.elasticsearch.script.ScriptType;
38+
import org.elasticsearch.plugins.SearchPlugin;
2039
import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
2140
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
41+
import org.elasticsearch.search.internal.ContextIndexSearcher;
2242
import org.elasticsearch.test.ESIntegTestCase;
43+
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
44+
import org.elasticsearch.xcontent.XContentBuilder;
2345

46+
import java.io.IOException;
2447
import java.util.Collection;
2548
import java.util.Collections;
26-
import java.util.Map;
49+
import java.util.List;
2750
import java.util.concurrent.TimeUnit;
28-
import java.util.function.Function;
2951

30-
import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
31-
import static org.elasticsearch.index.query.QueryBuilders.scriptQuery;
32-
import static org.elasticsearch.search.SearchTimeoutIT.ScriptedTimeoutPlugin.SCRIPT_NAME;
3352
import static org.hamcrest.Matchers.equalTo;
3453
import static org.hamcrest.Matchers.greaterThan;
3554

3655
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE)
56+
@ESIntegTestCase.SuiteScopeTestCase
3757
public class SearchTimeoutIT extends ESIntegTestCase {
3858

3959
@Override
4060
protected Collection<Class<? extends Plugin>> nodePlugins() {
41-
return Collections.singleton(ScriptedTimeoutPlugin.class);
61+
return Collections.singleton(BulkScorerTimeoutQueryPlugin.class);
4262
}
4363

4464
@Override
4565
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
4666
return Settings.builder().put(super.nodeSettings(nodeOrdinal, otherSettings)).build();
4767
}
4868

49-
private void indexDocs() {
50-
for (int i = 0; i < 32; i++) {
51-
prepareIndex("test").setId(Integer.toString(i)).setSource("field", "value").get();
52-
}
53-
refresh("test");
69+
@Override
70+
protected void setupSuiteScopeCluster() throws Exception {
71+
super.setupSuiteScopeCluster();
72+
indexRandom(true, "test", randomIntBetween(20, 50));
5473
}
5574

56-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98369")
57-
public void testTopHitsTimeout() {
58-
indexDocs();
59-
SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
60-
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
61-
.get();
62-
assertThat(searchResponse.isTimedOut(), equalTo(true));
63-
assertEquals(0, searchResponse.getShardFailures().length);
64-
assertEquals(0, searchResponse.getFailedShards());
65-
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
66-
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
67-
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
68-
assertThat(searchResponse.getHits().getHits().length, greaterThan(0));
75+
public void testTopHitsTimeoutBeforeCollecting() {
76+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
77+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.SECONDS))
78+
.setQuery(new BulkScorerTimeoutQuery(false));
79+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
80+
assertThat(searchResponse.isTimedOut(), equalTo(true));
81+
assertEquals(0, searchResponse.getShardFailures().length);
82+
assertEquals(0, searchResponse.getFailedShards());
83+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
84+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
85+
// timeout happened before we could collect any doc, total hits is 0 and no hits are returned
86+
assertEquals(0, searchResponse.getHits().getTotalHits().value());
87+
assertEquals(0, searchResponse.getHits().getHits().length);
88+
});
6989
}
7090

71-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98053")
72-
public void testAggsTimeout() {
73-
indexDocs();
74-
SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
75-
.setSize(0)
76-
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
77-
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"))
78-
.get();
79-
assertThat(searchResponse.isTimedOut(), equalTo(true));
80-
assertEquals(0, searchResponse.getShardFailures().length);
81-
assertEquals(0, searchResponse.getFailedShards());
82-
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
83-
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
84-
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
85-
assertEquals(searchResponse.getHits().getHits().length, 0);
86-
StringTerms terms = searchResponse.getAggregations().get("terms");
87-
assertEquals(1, terms.getBuckets().size());
88-
StringTerms.Bucket bucket = terms.getBuckets().get(0);
89-
assertEquals("value", bucket.getKeyAsString());
90-
assertThat(bucket.getDocCount(), greaterThan(0L));
91+
public void testTopHitsTimeoutWhileCollecting() {
92+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
93+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.SECONDS))
94+
.setQuery(new BulkScorerTimeoutQuery(true));
95+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
96+
assertThat(searchResponse.isTimedOut(), equalTo(true));
97+
assertEquals(0, searchResponse.getShardFailures().length);
98+
assertEquals(0, searchResponse.getFailedShards());
99+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
100+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
101+
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
102+
assertThat(searchResponse.getHits().getHits().length, greaterThan(0));
103+
});
91104
}
92105

93-
public void testPartialResultsIntolerantTimeout() throws Exception {
94-
prepareIndex("test").setId("1").setSource("field", "value").setRefreshPolicy(IMMEDIATE).get();
106+
public void testAggsTimeoutBeforeCollecting() {
107+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setSize(0)
108+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
109+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
110+
.setQuery(new BulkScorerTimeoutQuery(false))
111+
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"));
112+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
113+
assertThat(searchResponse.isTimedOut(), equalTo(true));
114+
assertEquals(0, searchResponse.getShardFailures().length);
115+
assertEquals(0, searchResponse.getFailedShards());
116+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
117+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
118+
assertEquals(0, searchResponse.getHits().getTotalHits().value());
119+
assertEquals(0, searchResponse.getHits().getHits().length);
120+
StringTerms terms = searchResponse.getAggregations().get("terms");
121+
// timeout happened before we could collect any doc, total hits is 0 and no buckets are returned
122+
assertEquals(0, terms.getBuckets().size());
123+
});
124+
}
95125

126+
public void testAggsTimeoutWhileCollecting() {
127+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setSize(0)
128+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
129+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
130+
.setQuery(new BulkScorerTimeoutQuery(true))
131+
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"));
132+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
133+
assertThat(searchResponse.isTimedOut(), equalTo(true));
134+
assertEquals(0, searchResponse.getShardFailures().length);
135+
assertEquals(0, searchResponse.getFailedShards());
136+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
137+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
138+
assertThat(searchResponse.getHits().getTotalHits().value(), greaterThan(0L));
139+
assertEquals(0, searchResponse.getHits().getHits().length);
140+
StringTerms terms = searchResponse.getAggregations().get("terms");
141+
assertEquals(1, terms.getBuckets().size());
142+
StringTerms.Bucket bucket = terms.getBuckets().get(0);
143+
assertEquals("value", bucket.getKeyAsString());
144+
assertThat(bucket.getDocCount(), greaterThan(0L));
145+
});
146+
}
147+
148+
public void testPartialResultsIntolerantTimeoutBeforeCollecting() {
96149
ElasticsearchException ex = expectThrows(
97150
ElasticsearchException.class,
98-
prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
99-
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
151+
prepareSearch("test")
152+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
153+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
154+
.setQuery(new BulkScorerTimeoutQuery(false))
100155
.setAllowPartialSearchResults(false) // this line causes timeouts to report failures
101156
);
102157
assertTrue(ex.toString().contains("Time exceeded"));
158+
assertEquals(429, ex.status().getStatus());
103159
}
104160

105-
public static class ScriptedTimeoutPlugin extends MockScriptPlugin {
106-
static final String SCRIPT_NAME = "search_timeout";
161+
public void testPartialResultsIntolerantTimeoutWhileCollecting() {
162+
ElasticsearchException ex = expectThrows(
163+
ElasticsearchException.class,
164+
prepareSearch("test")
165+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
166+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
167+
.setQuery(new BulkScorerTimeoutQuery(true))
168+
.setAllowPartialSearchResults(false) // this line causes timeouts to report failures
169+
);
170+
assertTrue(ex.toString().contains("Time exceeded"));
171+
assertEquals(429, ex.status().getStatus());
172+
}
173+
174+
public static final class BulkScorerTimeoutQueryPlugin extends Plugin implements SearchPlugin {
175+
@Override
176+
public List<QuerySpec<?>> getQueries() {
177+
return Collections.singletonList(new QuerySpec<QueryBuilder>("timeout", BulkScorerTimeoutQuery::new, parser -> {
178+
throw new UnsupportedOperationException();
179+
}));
180+
}
181+
}
182+
183+
/**
184+
* Query builder that produces a Lucene Query which throws a
185+
* {@link org.elasticsearch.search.internal.ContextIndexSearcher.TimeExceededException} before or while scoring documents.
186+
* This helps make this test not time dependent, otherwise it would be unpredictable when exactly the timeout happens, which is
187+
* rather important if we want to test that we are able to return partial results on timeout.
188+
*/
189+
public static final class BulkScorerTimeoutQuery extends AbstractQueryBuilder<BulkScorerTimeoutQuery> {
190+
191+
private final boolean partialResults;
192+
193+
BulkScorerTimeoutQuery(boolean partialResults) {
194+
this.partialResults = partialResults;
195+
}
196+
197+
BulkScorerTimeoutQuery(StreamInput in) throws IOException {
198+
super(in);
199+
this.partialResults = in.readBoolean();
200+
}
201+
202+
@Override
203+
protected void doWriteTo(StreamOutput out) throws IOException {
204+
out.writeBoolean(partialResults);
205+
}
206+
207+
@Override
208+
protected void doXContent(XContentBuilder builder, Params params) {}
107209

108210
@Override
109-
public Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
110-
return Collections.singletonMap(SCRIPT_NAME, params -> {
111-
try {
112-
Thread.sleep(500);
113-
} catch (InterruptedException e) {
114-
throw new RuntimeException(e);
211+
protected Query doToQuery(SearchExecutionContext context) {
212+
return new Query() {
213+
@Override
214+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
215+
return new ConstantScoreWeight(this, boost) {
216+
@Override
217+
public boolean isCacheable(LeafReaderContext ctx) {
218+
return false;
219+
}
220+
221+
@Override
222+
public ScorerSupplier scorerSupplier(LeafReaderContext context) {
223+
return new ScorerSupplier() {
224+
@Override
225+
public BulkScorer bulkScorer() {
226+
if (partialResults == false) {
227+
((ContextIndexSearcher) searcher).throwTimeExceededException();
228+
}
229+
final int maxDoc = context.reader().maxDoc();
230+
return new BulkScorer() {
231+
@Override
232+
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
233+
max = Math.min(max, maxDoc);
234+
collector.setScorer(new Scorable() {
235+
@Override
236+
public float score() {
237+
return 1f;
238+
}
239+
});
240+
for (int doc = min; doc < max; ++doc) {
241+
if (acceptDocs == null || acceptDocs.get(doc)) {
242+
collector.collect(doc);
243+
// collect one doc per segment, only then throw a timeout: this ensures partial
244+
// results are returned
245+
((ContextIndexSearcher) searcher).throwTimeExceededException();
246+
}
247+
}
248+
// there is a slight chance that no docs are scored for a specific segment.
249+
// other shards / slices will throw the timeout anyway, one is enough.
250+
return max == maxDoc ? DocIdSetIterator.NO_MORE_DOCS : max;
251+
}
252+
253+
@Override
254+
public long cost() {
255+
return maxDoc;
256+
}
257+
};
258+
}
259+
260+
@Override
261+
public Scorer get(long leadCost) {
262+
assert false;
263+
return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.all(context.reader().maxDoc()));
264+
}
265+
266+
@Override
267+
public long cost() {
268+
assert false;
269+
return context.reader().maxDoc();
270+
}
271+
};
272+
}
273+
};
274+
}
275+
276+
@Override
277+
public String toString(String field) {
278+
return "timeout query";
279+
}
280+
281+
@Override
282+
public void visit(QueryVisitor visitor) {
283+
visitor.visitLeaf(this);
284+
}
285+
286+
@Override
287+
public boolean equals(Object obj) {
288+
return sameClassAs(obj);
289+
}
290+
291+
@Override
292+
public int hashCode() {
293+
return classHash();
115294
}
116-
return true;
117-
});
295+
};
296+
}
297+
298+
@Override
299+
protected boolean doEquals(BulkScorerTimeoutQuery other) {
300+
return false;
301+
}
302+
303+
@Override
304+
protected int doHashCode() {
305+
return 0;
306+
}
307+
308+
@Override
309+
public String getWriteableName() {
310+
return "timeout";
311+
}
312+
313+
@Override
314+
public TransportVersion getMinimalSupportedVersion() {
315+
return null;
118316
}
119317
}
120318
}

0 commit comments

Comments
 (0)