Skip to content

Commit f7216d1

Browse files
authored
Fix SearchTimeoutIT (elastic#120390) (elastic#122204)
Two of the timeout tests have been muted for several months. The reason is that we tightened the assertions to cover for partial results being returned, but there were edge cases in which partial results were not actually returned. The timeout used in the test was time dependent, hence when the timeout precisely will be thrown is unpredictable, because we have timeout checks in different places in the codebase, when iterating through the leaves, before scoring any document, or while scoring documents. The edge case that caused failures is a typical timing issue where the initial check for timeout in CancellableBulkScorer already triggers the timeout, before any document has been collected. I made several adjustments to the test to make it more robust: - use index random to index documents, that speeds it up - share indexing across test methods, so that it happens once at the suite level - replace the custom query that triggers a timeout to not be a script query, but rather a lucene query that is not time dependent, and throws a time exceeded exception precisely where we expect it, so that we can test how the system reacts to that. That allows to test that partial results are always returned when a timeout happens while scoring documents, and that partial results are never returned when a timeout happens before we even started to score documents. Closes elastic#98369 Closes elastic#98053
1 parent 9c3c628 commit f7216d1

File tree

1 file changed

+259
-62
lines changed

1 file changed

+259
-62
lines changed

server/src/internalClusterTest/java/org/elasticsearch/search/SearchTimeoutIT.java

Lines changed: 259 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -9,112 +9,309 @@
99

1010
package org.elasticsearch.search;
1111

12+
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.search.BulkScorer;
14+
import org.apache.lucene.search.ConstantScoreScorer;
15+
import org.apache.lucene.search.ConstantScoreWeight;
16+
import org.apache.lucene.search.DocIdSetIterator;
17+
import org.apache.lucene.search.IndexSearcher;
18+
import org.apache.lucene.search.LeafCollector;
19+
import org.apache.lucene.search.Query;
20+
import org.apache.lucene.search.QueryVisitor;
21+
import org.apache.lucene.search.Scorable;
22+
import org.apache.lucene.search.ScoreMode;
23+
import org.apache.lucene.search.Scorer;
24+
import org.apache.lucene.search.Weight;
25+
import org.apache.lucene.util.Bits;
1226
import org.elasticsearch.ElasticsearchException;
13-
import org.elasticsearch.action.search.SearchResponse;
27+
import org.elasticsearch.TransportVersion;
28+
import org.elasticsearch.action.search.SearchRequestBuilder;
29+
import org.elasticsearch.common.io.stream.StreamInput;
30+
import org.elasticsearch.common.io.stream.StreamOutput;
1431
import org.elasticsearch.common.settings.Settings;
1532
import org.elasticsearch.core.TimeValue;
33+
import org.elasticsearch.index.query.AbstractQueryBuilder;
34+
import org.elasticsearch.index.query.QueryBuilder;
35+
import org.elasticsearch.index.query.SearchExecutionContext;
1636
import org.elasticsearch.plugins.Plugin;
17-
import org.elasticsearch.script.MockScriptPlugin;
18-
import org.elasticsearch.script.Script;
19-
import org.elasticsearch.script.ScriptType;
37+
import org.elasticsearch.plugins.SearchPlugin;
2038
import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
2139
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
40+
import org.elasticsearch.search.internal.ContextIndexSearcher;
2241
import org.elasticsearch.test.ESIntegTestCase;
42+
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
43+
import org.elasticsearch.xcontent.XContentBuilder;
2344

45+
import java.io.IOException;
2446
import java.util.Collection;
2547
import java.util.Collections;
26-
import java.util.Map;
48+
import java.util.List;
2749
import java.util.concurrent.TimeUnit;
28-
import java.util.function.Function;
2950

30-
import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
31-
import static org.elasticsearch.index.query.QueryBuilders.scriptQuery;
32-
import static org.elasticsearch.search.SearchTimeoutIT.ScriptedTimeoutPlugin.SCRIPT_NAME;
3351
import static org.hamcrest.Matchers.equalTo;
3452
import static org.hamcrest.Matchers.greaterThan;
3553

3654
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE)
55+
@ESIntegTestCase.SuiteScopeTestCase
3756
public class SearchTimeoutIT extends ESIntegTestCase {
3857

3958
@Override
4059
protected Collection<Class<? extends Plugin>> nodePlugins() {
41-
return Collections.singleton(ScriptedTimeoutPlugin.class);
60+
return Collections.singleton(BulkScorerTimeoutQueryPlugin.class);
4261
}
4362

4463
@Override
4564
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
4665
return Settings.builder().put(super.nodeSettings(nodeOrdinal, otherSettings)).build();
4766
}
4867

49-
private void indexDocs() {
50-
for (int i = 0; i < 32; i++) {
51-
prepareIndex("test").setId(Integer.toString(i)).setSource("field", "value").get();
52-
}
53-
refresh("test");
68+
@Override
69+
protected void setupSuiteScopeCluster() throws Exception {
70+
super.setupSuiteScopeCluster();
71+
indexRandom(true, "test", randomIntBetween(20, 50));
5472
}
5573

56-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98369")
57-
public void testTopHitsTimeout() {
58-
indexDocs();
59-
SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
60-
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
61-
.get();
62-
assertThat(searchResponse.isTimedOut(), equalTo(true));
63-
assertEquals(0, searchResponse.getShardFailures().length);
64-
assertEquals(0, searchResponse.getFailedShards());
65-
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
66-
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
67-
assertThat(searchResponse.getHits().getTotalHits().value, greaterThan(0L));
68-
assertThat(searchResponse.getHits().getHits().length, greaterThan(0));
74+
public void testTopHitsTimeoutBeforeCollecting() {
75+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
76+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.SECONDS))
77+
.setQuery(new BulkScorerTimeoutQuery(false));
78+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
79+
assertThat(searchResponse.isTimedOut(), equalTo(true));
80+
assertEquals(0, searchResponse.getShardFailures().length);
81+
assertEquals(0, searchResponse.getFailedShards());
82+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
83+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
84+
// timeout happened before we could collect any doc, total hits is 0 and no hits are returned
85+
assertEquals(0, searchResponse.getHits().getTotalHits().value);
86+
assertEquals(0, searchResponse.getHits().getHits().length);
87+
});
6988
}
7089

71-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/98053")
72-
public void testAggsTimeout() {
73-
indexDocs();
74-
SearchResponse searchResponse = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
75-
.setSize(0)
76-
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
77-
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"))
78-
.get();
79-
assertThat(searchResponse.isTimedOut(), equalTo(true));
80-
assertEquals(0, searchResponse.getShardFailures().length);
81-
assertEquals(0, searchResponse.getFailedShards());
82-
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
83-
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
84-
assertThat(searchResponse.getHits().getTotalHits().value, greaterThan(0L));
85-
assertEquals(searchResponse.getHits().getHits().length, 0);
86-
StringTerms terms = searchResponse.getAggregations().get("terms");
87-
assertEquals(1, terms.getBuckets().size());
88-
StringTerms.Bucket bucket = terms.getBuckets().get(0);
89-
assertEquals("value", bucket.getKeyAsString());
90-
assertThat(bucket.getDocCount(), greaterThan(0L));
90+
public void testTopHitsTimeoutWhileCollecting() {
91+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
92+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.SECONDS))
93+
.setQuery(new BulkScorerTimeoutQuery(true));
94+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
95+
assertThat(searchResponse.isTimedOut(), equalTo(true));
96+
assertEquals(0, searchResponse.getShardFailures().length);
97+
assertEquals(0, searchResponse.getFailedShards());
98+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
99+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
100+
assertThat(searchResponse.getHits().getTotalHits().value, greaterThan(0L));
101+
assertThat(searchResponse.getHits().getHits().length, greaterThan(0));
102+
});
91103
}
92104

93-
public void testPartialResultsIntolerantTimeout() throws Exception {
94-
prepareIndex("test").setId("1").setSource("field", "value").setRefreshPolicy(IMMEDIATE).get();
105+
public void testAggsTimeoutBeforeCollecting() {
106+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setSize(0)
107+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
108+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
109+
.setQuery(new BulkScorerTimeoutQuery(false))
110+
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"));
111+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
112+
assertThat(searchResponse.isTimedOut(), equalTo(true));
113+
assertEquals(0, searchResponse.getShardFailures().length);
114+
assertEquals(0, searchResponse.getFailedShards());
115+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
116+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
117+
assertEquals(0, searchResponse.getHits().getTotalHits().value);
118+
assertEquals(0, searchResponse.getHits().getHits().length);
119+
StringTerms terms = searchResponse.getAggregations().get("terms");
120+
// timeout happened before we could collect any doc, total hits is 0 and no buckets are returned
121+
assertEquals(0, terms.getBuckets().size());
122+
});
123+
}
95124

125+
public void testAggsTimeoutWhileCollecting() {
126+
SearchRequestBuilder searchRequestBuilder = prepareSearch("test").setSize(0)
127+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
128+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
129+
.setQuery(new BulkScorerTimeoutQuery(true))
130+
.addAggregation(new TermsAggregationBuilder("terms").field("field.keyword"));
131+
ElasticsearchAssertions.assertResponse(searchRequestBuilder, searchResponse -> {
132+
assertThat(searchResponse.isTimedOut(), equalTo(true));
133+
assertEquals(0, searchResponse.getShardFailures().length);
134+
assertEquals(0, searchResponse.getFailedShards());
135+
assertThat(searchResponse.getSuccessfulShards(), greaterThan(0));
136+
assertEquals(searchResponse.getSuccessfulShards(), searchResponse.getTotalShards());
137+
assertThat(searchResponse.getHits().getTotalHits().value, greaterThan(0L));
138+
assertEquals(0, searchResponse.getHits().getHits().length);
139+
StringTerms terms = searchResponse.getAggregations().get("terms");
140+
assertEquals(1, terms.getBuckets().size());
141+
StringTerms.Bucket bucket = terms.getBuckets().get(0);
142+
assertEquals("value", bucket.getKeyAsString());
143+
assertThat(bucket.getDocCount(), greaterThan(0L));
144+
});
145+
}
146+
147+
public void testPartialResultsIntolerantTimeoutBeforeCollecting() {
96148
ElasticsearchException ex = expectThrows(
97149
ElasticsearchException.class,
98-
prepareSearch("test").setTimeout(new TimeValue(10, TimeUnit.MILLISECONDS))
99-
.setQuery(scriptQuery(new Script(ScriptType.INLINE, "mockscript", SCRIPT_NAME, Collections.emptyMap())))
150+
prepareSearch("test")
151+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
152+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
153+
.setQuery(new BulkScorerTimeoutQuery(false))
100154
.setAllowPartialSearchResults(false) // this line causes timeouts to report failures
101155
);
102156
assertTrue(ex.toString().contains("Time exceeded"));
157+
assertEquals(504, ex.status().getStatus());
103158
}
104159

105-
public static class ScriptedTimeoutPlugin extends MockScriptPlugin {
106-
static final String SCRIPT_NAME = "search_timeout";
160+
public void testPartialResultsIntolerantTimeoutWhileCollecting() {
161+
ElasticsearchException ex = expectThrows(
162+
ElasticsearchException.class,
163+
prepareSearch("test")
164+
// setting the timeout is necessary only because we check that if a TimeExceededException is thrown, a timeout was set
165+
.setTimeout(new TimeValue(10, TimeUnit.SECONDS))
166+
.setQuery(new BulkScorerTimeoutQuery(true))
167+
.setAllowPartialSearchResults(false) // this line causes timeouts to report failures
168+
);
169+
assertTrue(ex.toString().contains("Time exceeded"));
170+
assertEquals(504, ex.status().getStatus());
171+
}
172+
173+
public static final class BulkScorerTimeoutQueryPlugin extends Plugin implements SearchPlugin {
174+
@Override
175+
public List<QuerySpec<?>> getQueries() {
176+
return Collections.singletonList(new QuerySpec<QueryBuilder>("timeout", BulkScorerTimeoutQuery::new, parser -> {
177+
throw new UnsupportedOperationException();
178+
}));
179+
}
180+
}
181+
182+
/**
183+
* Query builder that produces a Lucene Query which throws a
184+
* {@link org.elasticsearch.search.internal.ContextIndexSearcher.TimeExceededException} before or while scoring documents.
185+
* This helps make this test not time dependent, otherwise it would be unpredictable when exactly the timeout happens, which is
186+
* rather important if we want to test that we are able to return partial results on timeout.
187+
*/
188+
public static final class BulkScorerTimeoutQuery extends AbstractQueryBuilder<BulkScorerTimeoutQuery> {
189+
190+
private final boolean partialResults;
191+
192+
BulkScorerTimeoutQuery(boolean partialResults) {
193+
this.partialResults = partialResults;
194+
}
195+
196+
BulkScorerTimeoutQuery(StreamInput in) throws IOException {
197+
super(in);
198+
this.partialResults = in.readBoolean();
199+
}
200+
201+
@Override
202+
protected void doWriteTo(StreamOutput out) throws IOException {
203+
out.writeBoolean(partialResults);
204+
}
205+
206+
@Override
207+
protected void doXContent(XContentBuilder builder, Params params) {}
107208

108209
@Override
109-
public Map<String, Function<Map<String, Object>, Object>> pluginScripts() {
110-
return Collections.singletonMap(SCRIPT_NAME, params -> {
111-
try {
112-
Thread.sleep(500);
113-
} catch (InterruptedException e) {
114-
throw new RuntimeException(e);
210+
protected Query doToQuery(SearchExecutionContext context) {
211+
return new Query() {
212+
@Override
213+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
214+
return new ConstantScoreWeight(this, boost) {
215+
@Override
216+
public BulkScorer bulkScorer(LeafReaderContext context) {
217+
if (partialResults == false) {
218+
((ContextIndexSearcher) searcher).throwTimeExceededException();
219+
}
220+
final int maxDoc = context.reader().maxDoc();
221+
return new BulkScorer() {
222+
@Override
223+
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
224+
max = Math.min(max, maxDoc);
225+
for (int doc = min; doc < max; ++doc) {
226+
if (acceptDocs == null || acceptDocs.get(doc)) {
227+
int docId = doc;
228+
collector.setScorer(new Scorable() {
229+
@Override
230+
public float score() {
231+
return 1f;
232+
}
233+
234+
@Override
235+
public int docID() {
236+
return docId;
237+
}
238+
});
239+
collector.collect(doc);
240+
// collect one doc per segment, only then throw a timeout: this ensures partial
241+
// results are returned
242+
((ContextIndexSearcher) searcher).throwTimeExceededException();
243+
}
244+
}
245+
// there is a slight chance that no docs are scored for a specific segment.
246+
// other shards / slices will throw the timeout anyway, one is enough.
247+
return max == maxDoc ? DocIdSetIterator.NO_MORE_DOCS : max;
248+
}
249+
250+
@Override
251+
public long cost() {
252+
return maxDoc;
253+
}
254+
};
255+
}
256+
257+
@Override
258+
public Scorer scorer(LeafReaderContext leafReaderContext) {
259+
assert false;
260+
return new ConstantScoreScorer(
261+
this,
262+
score(),
263+
scoreMode,
264+
DocIdSetIterator.all(leafReaderContext.reader().maxDoc())
265+
);
266+
}
267+
268+
@Override
269+
public boolean isCacheable(LeafReaderContext ctx) {
270+
return false;
271+
}
272+
};
273+
}
274+
275+
@Override
276+
public String toString(String field) {
277+
return "timeout query";
278+
}
279+
280+
@Override
281+
public void visit(QueryVisitor visitor) {
282+
visitor.visitLeaf(this);
283+
}
284+
285+
@Override
286+
public boolean equals(Object obj) {
287+
return sameClassAs(obj);
288+
}
289+
290+
@Override
291+
public int hashCode() {
292+
return classHash();
115293
}
116-
return true;
117-
});
294+
};
295+
}
296+
297+
@Override
298+
protected boolean doEquals(BulkScorerTimeoutQuery other) {
299+
return false;
300+
}
301+
302+
@Override
303+
protected int doHashCode() {
304+
return 0;
305+
}
306+
307+
@Override
308+
public String getWriteableName() {
309+
return "timeout";
310+
}
311+
312+
@Override
313+
public TransportVersion getMinimalSupportedVersion() {
314+
return null;
118315
}
119316
}
120317
}

0 commit comments

Comments
 (0)