Skip to content

Commit ddb9413

Browse files
authored
[8.x] Handle all exceptions in data nodes can match (#117469) (#118533)
* Handle all exceptions in data nodes can match (#117469) During the can match phase, prior to the query phase, we may have exceptions that are returned back to the coordinating node, handled gracefully as if the shard returned canMatch=true. During the query phase, we perform an additional rewrite and can match phase to eventually shortcut the query phase for the shard. That needs to handle exceptions as well. Currently, an exception there causes shard failures, while we should rather go ahead and execute the query on the shard. Instead of adding another try catch on consumers code, this commit adds exception handling to the method itself so that it can no longer throw exceptions and similar mistakes can no longer be made in the future. At the same time, this commit makes the can match method more easily testable without requiring a full-blown SearchService instance. Closes #104994 * fix compile
1 parent bde2b85 commit ddb9413

File tree

9 files changed

+3521
-3133
lines changed

9 files changed

+3521
-3133
lines changed

docs/changelog/117469.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 117469
2+
summary: Handle exceptions in query phase can match
3+
area: Search
4+
type: bug
5+
issues:
6+
- 104994

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 126 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@
150150
import java.util.concurrent.atomic.AtomicBoolean;
151151
import java.util.concurrent.atomic.AtomicInteger;
152152
import java.util.concurrent.atomic.AtomicLong;
153+
import java.util.function.BiFunction;
154+
import java.util.function.Function;
153155
import java.util.function.LongSupplier;
154156
import java.util.function.Supplier;
155157

@@ -559,16 +561,17 @@ public void executeQueryPhase(ShardSearchRequest request, SearchShardTask task,
559561
// check if we can shortcut the query phase entirely.
560562
if (orig.canReturnNullResponseIfMatchNoDocs()) {
561563
assert orig.scroll() == null;
562-
final CanMatchShardResponse canMatchResp;
563-
try {
564-
ShardSearchRequest clone = new ShardSearchRequest(orig);
565-
canMatchResp = canMatch(clone, false);
566-
} catch (Exception exc) {
567-
l.onFailure(exc);
568-
return;
569-
}
564+
ShardSearchRequest clone = new ShardSearchRequest(orig);
565+
CanMatchContext canMatchContext = new CanMatchContext(
566+
clone,
567+
indicesService::indexServiceSafe,
568+
this::findReaderContext,
569+
defaultKeepAlive,
570+
maxKeepAlive
571+
);
572+
CanMatchShardResponse canMatchResp = canMatch(canMatchContext, false);
570573
if (canMatchResp.canMatch() == false) {
571-
l.onResponse(QuerySearchResult.nullInstance());
574+
listener.onResponse(QuerySearchResult.nullInstance());
572575
return;
573576
}
574577
}
@@ -1201,25 +1204,37 @@ public void freeAllScrollContexts() {
12011204
}
12021205

12031206
private long getKeepAlive(ShardSearchRequest request) {
1207+
return getKeepAlive(request, defaultKeepAlive, maxKeepAlive);
1208+
}
1209+
1210+
private static long getKeepAlive(ShardSearchRequest request, long defaultKeepAlive, long maxKeepAlive) {
12041211
if (request.scroll() != null) {
1205-
return getScrollKeepAlive(request.scroll());
1212+
return getScrollKeepAlive(request.scroll(), defaultKeepAlive, maxKeepAlive);
12061213
} else if (request.keepAlive() != null) {
1207-
checkKeepAliveLimit(request.keepAlive().millis());
1214+
checkKeepAliveLimit(request.keepAlive().millis(), maxKeepAlive);
12081215
return request.keepAlive().getMillis();
12091216
} else {
12101217
return request.readerId() == null ? defaultKeepAlive : -1;
12111218
}
12121219
}
12131220

12141221
private long getScrollKeepAlive(Scroll scroll) {
1222+
return getScrollKeepAlive(scroll, defaultKeepAlive, maxKeepAlive);
1223+
}
1224+
1225+
private static long getScrollKeepAlive(Scroll scroll, long defaultKeepAlive, long maxKeepAlive) {
12151226
if (scroll != null && scroll.keepAlive() != null) {
1216-
checkKeepAliveLimit(scroll.keepAlive().millis());
1227+
checkKeepAliveLimit(scroll.keepAlive().millis(), maxKeepAlive);
12171228
return scroll.keepAlive().getMillis();
12181229
}
12191230
return defaultKeepAlive;
12201231
}
12211232

12221233
private void checkKeepAliveLimit(long keepAlive) {
1234+
checkKeepAliveLimit(keepAlive, maxKeepAlive);
1235+
}
1236+
1237+
private static void checkKeepAliveLimit(long keepAlive, long maxKeepAlive) {
12231238
if (keepAlive > maxKeepAlive) {
12241239
throw new IllegalArgumentException(
12251240
"Keep alive for request ("
@@ -1678,6 +1693,7 @@ public void canMatch(CanMatchNodeRequest request, ActionListener<CanMatchNodeRes
16781693
final List<CanMatchNodeResponse.ResponseOrFailure> responses = new ArrayList<>(shardLevelRequests.size());
16791694
for (var shardLevelRequest : shardLevelRequests) {
16801695
try {
1696+
// TODO remove the exception handling as it's now in canMatch itself
16811697
responses.add(new CanMatchNodeResponse.ResponseOrFailure(canMatch(request.createShardSearchRequest(shardLevelRequest))));
16821698
} catch (Exception e) {
16831699
responses.add(new CanMatchNodeResponse.ResponseOrFailure(e));
@@ -1689,82 +1705,145 @@ public void canMatch(CanMatchNodeRequest request, ActionListener<CanMatchNodeRes
16891705
/**
16901706
* This method uses a lightweight searcher without wrapping (i.e., not open a full reader on frozen indices) to rewrite the query
16911707
* to check if the query can match any documents. This method can have false positives while if it returns {@code false} the query
1692-
* won't match any documents on the current shard.
1708+
* won't match any documents on the current shard. Exceptions are handled within the method, and never re-thrown.
16931709
*/
1694-
public CanMatchShardResponse canMatch(ShardSearchRequest request) throws IOException {
1695-
return canMatch(request, true);
1710+
public CanMatchShardResponse canMatch(ShardSearchRequest request) {
1711+
CanMatchContext canMatchContext = new CanMatchContext(
1712+
request,
1713+
indicesService::indexServiceSafe,
1714+
this::findReaderContext,
1715+
defaultKeepAlive,
1716+
maxKeepAlive
1717+
);
1718+
return canMatch(canMatchContext, true);
16961719
}
16971720

1698-
private CanMatchShardResponse canMatch(ShardSearchRequest request, boolean checkRefreshPending) throws IOException {
1699-
assert request.searchType() == SearchType.QUERY_THEN_FETCH : "unexpected search type: " + request.searchType();
1721+
static class CanMatchContext {
1722+
private final ShardSearchRequest request;
1723+
private final Function<Index, IndexService> indexServiceLookup;
1724+
private final BiFunction<ShardSearchContextId, TransportRequest, ReaderContext> findReaderContext;
1725+
private final long defaultKeepAlive;
1726+
private final long maxKeepAlive;
1727+
1728+
private IndexService indexService;
1729+
1730+
CanMatchContext(
1731+
ShardSearchRequest request,
1732+
Function<Index, IndexService> indexServiceLookup,
1733+
BiFunction<ShardSearchContextId, TransportRequest, ReaderContext> findReaderContext,
1734+
long defaultKeepAlive,
1735+
long maxKeepAlive
1736+
) {
1737+
this.request = request;
1738+
this.indexServiceLookup = indexServiceLookup;
1739+
this.findReaderContext = findReaderContext;
1740+
this.defaultKeepAlive = defaultKeepAlive;
1741+
this.maxKeepAlive = maxKeepAlive;
1742+
}
1743+
1744+
long getKeepAlive() {
1745+
return SearchService.getKeepAlive(request, defaultKeepAlive, maxKeepAlive);
1746+
}
1747+
1748+
ReaderContext findReaderContext() {
1749+
return findReaderContext.apply(request.readerId(), request);
1750+
}
1751+
1752+
QueryRewriteContext getQueryRewriteContext(IndexService indexService) {
1753+
return indexService.newQueryRewriteContext(request::nowInMillis, request.getRuntimeMappings(), request.getClusterAlias());
1754+
}
1755+
1756+
SearchExecutionContext getSearchExecutionContext(Engine.Searcher searcher) {
1757+
return getIndexService().newSearchExecutionContext(
1758+
request.shardId().id(),
1759+
0,
1760+
searcher,
1761+
request::nowInMillis,
1762+
request.getClusterAlias(),
1763+
request.getRuntimeMappings()
1764+
);
1765+
}
1766+
1767+
IndexShard getShard() {
1768+
return getIndexService().getShard(request.shardId().getId());
1769+
}
1770+
1771+
IndexService getIndexService() {
1772+
if (this.indexService == null) {
1773+
this.indexService = indexServiceLookup.apply(request.shardId().getIndex());
1774+
}
1775+
return this.indexService;
1776+
}
1777+
}
1778+
1779+
static CanMatchShardResponse canMatch(CanMatchContext canMatchContext, boolean checkRefreshPending) {
1780+
assert canMatchContext.request.searchType() == SearchType.QUERY_THEN_FETCH
1781+
: "unexpected search type: " + canMatchContext.request.searchType();
17001782
Releasable releasable = null;
17011783
try {
17021784
IndexService indexService;
17031785
final boolean hasRefreshPending;
17041786
final Engine.Searcher canMatchSearcher;
1705-
if (request.readerId() != null) {
1787+
if (canMatchContext.request.readerId() != null) {
17061788
hasRefreshPending = false;
17071789
ReaderContext readerContext;
17081790
Engine.Searcher searcher;
17091791
try {
1710-
readerContext = findReaderContext(request.readerId(), request);
1711-
releasable = readerContext.markAsUsed(getKeepAlive(request));
1792+
readerContext = canMatchContext.findReaderContext();
1793+
releasable = readerContext.markAsUsed(canMatchContext.getKeepAlive());
17121794
indexService = readerContext.indexService();
1713-
if (canMatchAfterRewrite(request, indexService) == false) {
1795+
QueryRewriteContext queryRewriteContext = canMatchContext.getQueryRewriteContext(indexService);
1796+
if (queryStillMatchesAfterRewrite(canMatchContext.request, queryRewriteContext) == false) {
17141797
return new CanMatchShardResponse(false, null);
17151798
}
17161799
searcher = readerContext.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE);
17171800
} catch (SearchContextMissingException e) {
1718-
final String searcherId = request.readerId().getSearcherId();
1801+
final String searcherId = canMatchContext.request.readerId().getSearcherId();
17191802
if (searcherId == null) {
1720-
throw e;
1803+
return new CanMatchShardResponse(true, null);
17211804
}
1722-
indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
1723-
if (canMatchAfterRewrite(request, indexService) == false) {
1805+
if (queryStillMatchesAfterRewrite(
1806+
canMatchContext.request,
1807+
canMatchContext.getQueryRewriteContext(canMatchContext.getIndexService())
1808+
) == false) {
17241809
return new CanMatchShardResponse(false, null);
17251810
}
1726-
IndexShard indexShard = indexService.getShard(request.shardId().getId());
1727-
final Engine.SearcherSupplier searcherSupplier = indexShard.acquireSearcherSupplier();
1811+
final Engine.SearcherSupplier searcherSupplier = canMatchContext.getShard().acquireSearcherSupplier();
17281812
if (searcherId.equals(searcherSupplier.getSearcherId()) == false) {
17291813
searcherSupplier.close();
1730-
throw e;
1814+
return new CanMatchShardResponse(true, null);
17311815
}
17321816
releasable = searcherSupplier;
17331817
searcher = searcherSupplier.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE);
17341818
}
17351819
canMatchSearcher = searcher;
17361820
} else {
1737-
indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
1738-
if (canMatchAfterRewrite(request, indexService) == false) {
1821+
if (queryStillMatchesAfterRewrite(
1822+
canMatchContext.request,
1823+
canMatchContext.getQueryRewriteContext(canMatchContext.getIndexService())
1824+
) == false) {
17391825
return new CanMatchShardResponse(false, null);
17401826
}
1741-
IndexShard indexShard = indexService.getShard(request.shardId().getId());
1742-
boolean needsWaitForRefresh = request.waitForCheckpoint() != UNASSIGNED_SEQ_NO;
1827+
boolean needsWaitForRefresh = canMatchContext.request.waitForCheckpoint() != UNASSIGNED_SEQ_NO;
17431828
// If this request wait_for_refresh behavior, it is safest to assume a refresh is pending. Theoretically,
17441829
// this can be improved in the future by manually checking that the requested checkpoint has already been refresh.
17451830
// However, this will request modifying the engine to surface that information.
1831+
IndexShard indexShard = canMatchContext.getShard();
17461832
hasRefreshPending = needsWaitForRefresh || (indexShard.hasRefreshPending() && checkRefreshPending);
17471833
canMatchSearcher = indexShard.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE);
17481834
}
17491835
try (canMatchSearcher) {
1750-
SearchExecutionContext context = indexService.newSearchExecutionContext(
1751-
request.shardId().id(),
1752-
0,
1753-
canMatchSearcher,
1754-
request::nowInMillis,
1755-
request.getClusterAlias(),
1756-
request.getRuntimeMappings()
1757-
);
1758-
final boolean canMatch = queryStillMatchesAfterRewrite(request, context);
1759-
final MinAndMax<?> minMax;
1836+
SearchExecutionContext context = canMatchContext.getSearchExecutionContext(canMatchSearcher);
1837+
final boolean canMatch = queryStillMatchesAfterRewrite(canMatchContext.request, context);
17601838
if (canMatch || hasRefreshPending) {
1761-
FieldSortBuilder sortBuilder = FieldSortBuilder.getPrimaryFieldSortOrNull(request.source());
1762-
minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null;
1763-
} else {
1764-
minMax = null;
1839+
FieldSortBuilder sortBuilder = FieldSortBuilder.getPrimaryFieldSortOrNull(canMatchContext.request.source());
1840+
final MinAndMax<?> minMax = sortBuilder != null ? FieldSortBuilder.getMinMaxOrNull(context, sortBuilder) : null;
1841+
return new CanMatchShardResponse(true, minMax);
17651842
}
1766-
return new CanMatchShardResponse(canMatch || hasRefreshPending, minMax);
1843+
return new CanMatchShardResponse(false, null);
17671844
}
1845+
} catch (Exception e) {
1846+
return new CanMatchShardResponse(true, null);
17681847
} finally {
17691848
Releasables.close(releasable);
17701849
}
@@ -1777,15 +1856,6 @@ private CanMatchShardResponse canMatch(ShardSearchRequest request, boolean check
17771856
* {@link MatchNoneQueryBuilder}. This allows us to avoid extra work for example making the shard search active and waiting for
17781857
* refreshes.
17791858
*/
1780-
private static boolean canMatchAfterRewrite(final ShardSearchRequest request, final IndexService indexService) throws IOException {
1781-
final QueryRewriteContext queryRewriteContext = indexService.newQueryRewriteContext(
1782-
request::nowInMillis,
1783-
request.getRuntimeMappings(),
1784-
request.getClusterAlias()
1785-
);
1786-
return queryStillMatchesAfterRewrite(request, queryRewriteContext);
1787-
}
1788-
17891859
@SuppressWarnings("unchecked")
17901860
public static boolean queryStillMatchesAfterRewrite(ShardSearchRequest request, QueryRewriteContext context) throws IOException {
17911861
Rewriteable.rewrite(request.getRewriteable(), context, false);

0 commit comments

Comments
 (0)