Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -330,12 +330,9 @@ tests:
- class: org.elasticsearch.smoketest.MlWithSecurityIT
method: test {yaml=ml/3rd_party_deployment/Test start deployment fails while model download in progress}
issue: https://github.com/elastic/elasticsearch/issues/120814
- class: org.elasticsearch.search.query.QueryPhaseTimeoutTests
method: testScorerTimeoutPoints
issue: https://github.com/elastic/elasticsearch/issues/124140
- class: org.elasticsearch.search.query.QueryPhaseTimeoutTests
method: testScorerTimeoutTerms
issue: https://github.com/elastic/elasticsearch/issues/124141
- class: org.elasticsearch.compute.data.BlockMultiValuedTests
method: testToMask {elementType=BOOLEAN nullAllowed=true}
issue: https://github.com/elastic/elasticsearch/issues/124165
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was caused by a bad merge, I fixed that directly on main with dfe2b24

- class: org.elasticsearch.smoketest.MlWithSecurityIT
method: test {yaml=ml/start_data_frame_analytics/Test start classification analysis when the dependent variable is missing}
issue: https://github.com/elastic/elasticsearch/issues/124168
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ public void tearDown() throws Exception {
}

private static ContextIndexSearcher newContextSearcher(IndexReader reader) throws IOException {
// note that no executor is provided, as this test requires sequential execution
return new ContextIndexSearcher(
reader,
IndexSearcher.getDefaultSimilarity(),
Expand All @@ -125,27 +126,35 @@ private static ContextIndexSearcher newContextSearcher(IndexReader reader) throw
);
}

public void testScorerTimeoutTerms() throws IOException {
/**
* Test that a timeout is appropriately handled when the (exitable) directory reader raises it while loading terms enum
* as the scorer supplier is requested.
*/
public void testScorerSupplierTimeoutTerms() throws IOException {
assumeTrue("Test requires more than one segment", reader.leaves().size() > 1);
int size = randomBoolean() ? 0 : randomIntBetween(100, 500);
scorerTimeoutTest(size, context -> {
scorerSupplierTimeoutTest(size, context -> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been reading TimeoutQuery and MatchAllWeight defined in this test class but I'm still not sure how the function passed in triggers the timeout. At least that's what I'm supposing its doing given its name. Maybe its possible to explain this for future readers somehow?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I found something in ContextIndexSearcher#getTimeoutCheck that pulls things from the search context that we override in "createSearchContextWithTimeout", is that the mechanism? Not that obvious unfortunately if it is.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the new javadocs would help explaining this.

Test that a timeout is appropriately handled when the (exitable) directory reader raises it while loading terms enum as the scorer is retrieved from the scorer supplier.

The idea of these tests is that a timeout can be raised from different places: either because we wrap the bulk scorer with a cancellable one, or via the exitable directory reader that wraps the original reader. Because a timeout can be raised from different places, it's easy to miss and to forget handling the corresponding exception. The reason why the timeout is thrown is because we flip shouldTimeout in the search context. I am adding a bit more comments.

final TermsEnum termsEnum = context.reader().terms("field").iterator();
termsEnum.next();
});
}

public void testScorerTimeoutPoints() throws IOException {
/**
* Test that a timeout is appropriately handled when the (exitable) directory reader raises it while loading points
* as the scorer supplier is requested.
*/
public void testScorerSupplierTimeoutPoints() throws IOException {
assumeTrue("Test requires more than one segment", reader.leaves().size() > 1);
int size = randomBoolean() ? 0 : randomIntBetween(100, 500);
scorerTimeoutTest(size, context -> {
scorerSupplierTimeoutTest(size, context -> {
PointValues pointValues = context.reader().getPointValues("long");
pointValues.size();
});
}

private void scorerTimeoutTest(int size, CheckedConsumer<LeafReaderContext, IOException> timeoutTrigger) throws IOException {
private void scorerSupplierTimeoutTest(int size, CheckedConsumer<LeafReaderContext, IOException> timeoutTrigger) throws IOException {
{
TimeoutQuery query = newMatchAllScorerTimeoutQuery(timeoutTrigger, false);
TimeoutQuery query = newMatchAllScorerSupplierTimeoutQuery(timeoutTrigger, false);
try (SearchContext context = createSearchContext(query, size)) {
QueryPhase.executeQuery(context);
assertFalse(context.queryResult().searchTimedOut());
Expand All @@ -154,18 +163,20 @@ private void scorerTimeoutTest(int size, CheckedConsumer<LeafReaderContext, IOEx
}
}
{
TimeoutQuery query = newMatchAllScorerTimeoutQuery(timeoutTrigger, true);
TimeoutQuery query = newMatchAllScorerSupplierTimeoutQuery(timeoutTrigger, true);
try (SearchContext context = createSearchContextWithTimeout(query, size)) {
QueryPhase.executeQuery(context);
assertTrue(context.queryResult().searchTimedOut());
int firstSegmentMaxDoc = reader.leaves().get(0).reader().maxDoc();
assertEquals(Math.min(2048, firstSegmentMaxDoc), context.queryResult().topDocs().topDocs.totalHits.value());
// we are artificially raising the timeout when pulling the scorer supplier.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here: I've been trying to grok this but am most likely missing where the timeout is happening and why.

// We score the entire first segment, then trigger timeout.
assertEquals(firstSegmentMaxDoc, context.queryResult().topDocs().topDocs.totalHits.value());
assertEquals(Math.min(size, firstSegmentMaxDoc), context.queryResult().topDocs().topDocs.scoreDocs.length);
}
}
}

private static TimeoutQuery newMatchAllScorerTimeoutQuery(
private static TimeoutQuery newMatchAllScorerSupplierTimeoutQuery(
CheckedConsumer<LeafReaderContext, IOException> timeoutTrigger,
boolean isTimeoutExpected
) {
Expand All @@ -177,6 +188,7 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
// trigger the timeout as soon as the scorer supplier is request for the second segment
if (firstSegment == false && isTimeoutExpected) {
shouldTimeout = true;
}
Expand All @@ -190,6 +202,96 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
};
}

/**
* Test that a timeout is appropriately handled when the (exitable) directory reader raises it while loading terms enum
* as the scorer is retrieved from the scorer supplier
*/
public void testScorerGetTimeoutTerms() throws IOException {
assumeTrue("Test requires more than one segment", reader.leaves().size() > 1);
int size = randomBoolean() ? 0 : randomIntBetween(100, 500);
scorerGetTimeoutTest(size, context -> {
final TermsEnum termsEnum = context.reader().terms("field").iterator();
termsEnum.next();
});
}

/**
* Test that a timeout is appropriately handled when the (exitable) directory reader raises it while loading points
* as the scorer is retrieved from the scorer supplier
*/
public void testScorerGetTimeoutPoints() throws IOException {
assumeTrue("Test requires more than one segment", reader.leaves().size() > 1);
int size = randomBoolean() ? 0 : randomIntBetween(100, 500);
scorerGetTimeoutTest(size, context -> {
PointValues pointValues = context.reader().getPointValues("long");
pointValues.size();
});
}

private void scorerGetTimeoutTest(int size, CheckedConsumer<LeafReaderContext, IOException> timeoutTrigger) throws IOException {
{
TimeoutQuery query = newMatchAllScorerGetTimeoutQuery(timeoutTrigger, false);
try (SearchContext context = createSearchContext(query, size)) {
QueryPhase.executeQuery(context);
assertFalse(context.queryResult().searchTimedOut());
assertEquals(numDocs, context.queryResult().topDocs().topDocs.totalHits.value());
assertEquals(size, context.queryResult().topDocs().topDocs.scoreDocs.length);
}
}
{
TimeoutQuery query = newMatchAllScorerGetTimeoutQuery(timeoutTrigger, true);
try (SearchContext context = createSearchContextWithTimeout(query, size)) {
QueryPhase.executeQuery(context);
assertTrue(context.queryResult().searchTimedOut());
int firstSegmentMaxDoc = reader.leaves().get(0).reader().maxDoc();
// we are artificially raising the timeout when pulling the scorer supplier.
// We score the entire first segment, then trigger timeout.
assertEquals(firstSegmentMaxDoc, context.queryResult().topDocs().topDocs.totalHits.value());
assertEquals(Math.min(size, firstSegmentMaxDoc), context.queryResult().topDocs().topDocs.scoreDocs.length);
}
}
}

private static TimeoutQuery newMatchAllScorerGetTimeoutQuery(
CheckedConsumer<LeafReaderContext, IOException> timeoutTrigger,
boolean isTimeoutExpected
) {
return new TimeoutQuery() {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
return new MatchAllWeight(this, boost, scoreMode) {
boolean firstSegment = true;

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = super.scorerSupplier(context);
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It took me a bit to see the difference between this and the previous override of "ScorerSupplier scorerSupplier(LeafReaderContext context) earlier in this test (L192), maybe its worth adding some more comments around whats different here and why this deserves a second pair of tests.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make a suggestion? I am not sure what I can do to clarify this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two things that would have helped me understand this test quicker:

  • understand better how the timeout is actually triggered. I think I now understand that the TimeoutQuery.shouldTimeout flag controls this via the modified TestSearchContext that depending on the flags value returns 0/1 in the overwritten "getRelativeTimeInMillis". Mentioning that right next to the flag or maybe somewhere in "createSearchContextWithTimeout" would have helped me find my way quicker I think
  • realizing quicker that the getting the ScoreSupplier in "newMatchAllScorerSupplierTimeoutQuery" triggers the timeout vs. getting the Scorer in "newMatchAllScorerGetTimeoutQuery". Its obvious after you understand the trigger a bit and looked hard enough, but method comments on both of these helpers that point out the difference would have helped a lot. e.g.
// create a TimeoutQuery that triggers the timeout when pulling the scorer supplier after the first segment.
// Note this is different from "newMatchAllScorerGetTimeoutQuery" where we trigger the timeout when we
// get the Scorer from the ScoreSupplier
private static TimeoutQuery newMatchAllScorerSupplierTimeoutQuery(
[...]

and vice-versa. I might have this still wrong because I'm not 100% sure about the details here.

I also just realized the inline comments in the second parts "scorerGetTimeoutTest()" and "scorerSupplierTimeoutTest()" are similar, I think the former would benefit from a slight adjustment to underline the differences as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought this was made clear by the javadocs already:

as the scorer is retrieved from the scorer supplier

as opposed to

as the scorer supplier is requested

methods are also named that way to suggest the difference around pulling the supplier as opposed to getting the scorer from the supplier. I totally see that this is tricky.

// trigger the timeout as soon as the scorer is requested for the second segment
if (firstSegment == false && isTimeoutExpected) {
shouldTimeout = true;
}
timeoutTrigger.accept(context);
assert shouldTimeout == false : "should have already timed out";
firstSegment = false;
return scorerSupplier.get(leadCost);
}

@Override
public long cost() {
return scorerSupplier.cost();
}
};
}
};
}
};
}

/**
* Test that a timeout is appropriately handled while bulk scoring, via cancellable bulk scorer
*/
public void testBulkScorerTimeout() throws IOException {
int size = randomBoolean() ? 0 : randomIntBetween(100, 500);
{
Expand All @@ -207,6 +309,8 @@ public void testBulkScorerTimeout() throws IOException {
QueryPhase.executeQuery(context);
assertTrue(context.queryResult().searchTimedOut());
int firstSegmentMaxDoc = reader.leaves().get(0).reader().maxDoc();
// See CancellableBulkScorer#INITIAL_INTERVAL for the source of 2048: we always score the first
// batch of up to 2048 docs, and only then raise the timeout
assertEquals(Math.min(2048, firstSegmentMaxDoc), context.queryResult().topDocs().topDocs.totalHits.value());
assertEquals(Math.min(size, firstSegmentMaxDoc), context.queryResult().topDocs().topDocs.scoreDocs.length);
}
Expand All @@ -233,7 +337,7 @@ public long cost() {
}

@Override
public BulkScorer bulkScorer() throws IOException {
public BulkScorer bulkScorer() {
final float score = score();
final int maxDoc = context.reader().maxDoc();
return new BulkScorer() {
Expand All @@ -251,7 +355,7 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
}
if (timeoutExpected) {
// timeout after collecting the first batch of documents from the 1st segment, or the entire 1st
// segment
// segment if max > firstSegment.maxDoc()
shouldTimeout = true;
}
return max == maxDoc ? DocIdSetIterator.NO_MORE_DOCS : max;
Expand All @@ -274,6 +378,9 @@ private TestSearchContext createSearchContextWithTimeout(TimeoutQuery query, int
TestSearchContext context = new TestSearchContext(null, indexShard, newContextSearcher(reader)) {
@Override
public long getRelativeTimeInMillis() {
// this controls whether a timeout is raised or not. We abstract time away by pretending that the clock stops
// when a timeout is not expected. The tiniest increment to relative time in millis triggers a timeout.
// See QueryPhase#getTimeoutCheck
return query.shouldTimeout ? 1L : 0L;
}
};
Expand Down