Skip to content

Commit cb04885

Browse files
Expand the lifecycle of the AggregationContext (#94023)
Relates to #89437 This PR enables ref counting on QuerySearchResult, and moves responsibility for releasing the BigArrays used in aggregations collection to QuerySearchResult. This means the collection time circuit breakers will not be cleaned up until we have serialized the aggregations. The AggregationContext currently manages all of aggregations memory. Rather than change that, this PR extends the life cycle of the AggregationContext so it isn't closed until QuerySearchResult is closed, at which point we have serialized the aggregation information back to the coordinating node. --------- Co-authored-by: Elastic Machine <[email protected]>
1 parent dd968f5 commit cb04885

File tree

9 files changed

+253
-26
lines changed

9 files changed

+253
-26
lines changed

server/src/main/java/org/elasticsearch/index/SearchSlowLog.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ private static Map<String, Object> prepareMap(SearchContext context, long tookIn
189189
messageFields.put("elasticsearch.slowlog.message", context.indexShard().shardId());
190190
messageFields.put("elasticsearch.slowlog.took", TimeValue.timeValueNanos(tookInNanos).toString());
191191
messageFields.put("elasticsearch.slowlog.took_millis", TimeUnit.NANOSECONDS.toMillis(tookInNanos));
192-
if (context.queryResult().getTotalHits() != null) {
193-
messageFields.put("elasticsearch.slowlog.total_hits", context.queryResult().getTotalHits());
192+
if (context.getTotalHits() != null) {
193+
messageFields.put("elasticsearch.slowlog.total_hits", context.getTotalHits());
194194
} else {
195195
messageFields.put("elasticsearch.slowlog.total_hits", "-1");
196196
}

server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.elasticsearch.action.search.SearchType;
2121
import org.elasticsearch.common.lucene.search.Queries;
2222
import org.elasticsearch.core.Nullable;
23+
import org.elasticsearch.core.Releasable;
2324
import org.elasticsearch.core.TimeValue;
2425
import org.elasticsearch.index.IndexService;
2526
import org.elasticsearch.index.IndexSettings;
@@ -178,6 +179,7 @@ public void addFetchResult() {
178179
@Override
179180
public void addQueryResult() {
180181
this.queryResult = new QuerySearchResult(this.readerContext.id(), this.shardTarget, this.request);
182+
addReleasable(queryResult::decRef);
181183
}
182184

183185
@Override
@@ -713,6 +715,10 @@ public QuerySearchResult queryResult() {
713715
return queryResult;
714716
}
715717

718+
public void addQuerySearchResultReleasable(Releasable releasable) {
719+
queryResult.addReleasable(releasable);
720+
}
721+
716722
@Override
717723
public TotalHits getTotalHits() {
718724
if (queryResult != null) {

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,8 @@ private DfsSearchResult executeDfsPhase(ShardSearchRequest request, SearchShardT
447447
try (
448448
Releasable scope = tracer.withScope(task);
449449
Releasable ignored = readerContext.markAsUsed(getKeepAlive(request));
450-
SearchContext context = createContext(readerContext, request, task, true)
450+
SearchContext context = createContext(readerContext, request, task, ResultsType.DFS, false)
451451
) {
452-
context.addDfsResult();
453452
dfsPhase.execute(context);
454453
return context.dfsResult();
455454
} catch (Exception e) {
@@ -623,15 +622,19 @@ private static <T> void runAsync(Executor executor, CheckedSupplier<T, Exception
623622
executor.execute(ActionRunnable.supply(listener, executable::get));
624623
}
625624

625+
/**
626+
* The returned {@link SearchPhaseResult} will have had its ref count incremented by this method.
627+
* It is the responsibility of the caller to ensure that the ref count is correctly decremented
628+
* when the object is no longer needed.
629+
*/
626630
private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchShardTask task) throws Exception {
627631
final ReaderContext readerContext = createOrGetReaderContext(request);
628632
try (
629633
Releasable scope = tracer.withScope(task);
630634
Releasable ignored = readerContext.markAsUsed(getKeepAlive(request));
631-
SearchContext context = createContext(readerContext, request, task, true)
635+
SearchContext context = createContext(readerContext, request, task, ResultsType.QUERY, true)
632636
) {
633637
tracer.startTrace("executeQueryPhase", Map.of());
634-
context.addQueryResult();
635638
final long afterQueryTime;
636639
try (SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(context)) {
637640
loadOrExecuteQueryPhase(request, context);
@@ -643,6 +646,7 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh
643646
tracer.stopTrace();
644647
}
645648
if (request.numberOfShards() == 1) {
649+
// we already have query results, but we can run fetch at the same time
646650
context.addFetchResult();
647651
return executeFetchPhase(readerContext, context, afterQueryTime);
648652
} else {
@@ -651,6 +655,7 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh
651655
final RescoreDocIds rescoreDocIds = context.rescoreDocIds();
652656
context.queryResult().setRescoreDocIds(rescoreDocIds);
653657
readerContext.setRescoreDocIds(rescoreDocIds);
658+
context.queryResult().incRef();
654659
return context.queryResult();
655660
}
656661
} catch (Exception e) {
@@ -678,6 +683,7 @@ private QueryFetchSearchResult executeFetchPhase(ReaderContext reader, SearchCon
678683
}
679684
executor.success();
680685
}
686+
// This will incRef the QuerySearchResult when it gets created
681687
return new QueryFetchSearchResult(context.queryResult(), context.fetchResult());
682688
}
683689

@@ -698,15 +704,15 @@ public void executeQueryPhase(
698704
runAsync(getExecutor(readerContext.indexShard()), () -> {
699705
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null);
700706
try (
701-
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, false);
707+
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.QUERY, false);
702708
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext)
703709
) {
704-
searchContext.addQueryResult();
705710
searchContext.searcher().setAggregatedDfs(readerContext.getAggregatedDfs(null));
706711
processScroll(request, readerContext, searchContext);
707712
QueryPhase.execute(searchContext);
708713
executor.success();
709714
readerContext.setRescoreDocIds(searchContext.rescoreDocIds());
715+
// ScrollQuerySearchResult will incRef the QuerySearchResult when it gets constructed.
710716
return new ScrollQuerySearchResult(searchContext.queryResult(), searchContext.shardTarget());
711717
} catch (Exception e) {
712718
logger.trace("Query phase failed", e);
@@ -716,17 +722,21 @@ public void executeQueryPhase(
716722
}, wrapFailureListener(listener, readerContext, markAsUsed));
717723
}
718724

725+
/**
726+
* The returned {@link SearchPhaseResult} will have had its ref count incremented by this method.
727+
* It is the responsibility of the caller to ensure that the ref count is correctly decremented
728+
* when the object is no longer needed.
729+
*/
719730
public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, ActionListener<QuerySearchResult> listener) {
720731
final ReaderContext readerContext = findReaderContext(request.contextId(), request.shardSearchRequest());
721732
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.shardSearchRequest());
722733
final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest));
723734
runAsync(getExecutor(readerContext.indexShard()), () -> {
724735
readerContext.setAggregatedDfs(request.dfs());
725736
try (
726-
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, true);
737+
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.QUERY, true);
727738
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext)
728739
) {
729-
searchContext.addQueryResult();
730740
searchContext.searcher().setAggregatedDfs(request.dfs());
731741
QueryPhase.execute(searchContext);
732742
if (searchContext.queryResult().hasSearchContext() == false && readerContext.singleSession()) {
@@ -739,6 +749,7 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task,
739749
final RescoreDocIds rescoreDocIds = searchContext.rescoreDocIds();
740750
searchContext.queryResult().setRescoreDocIds(rescoreDocIds);
741751
readerContext.setRescoreDocIds(rescoreDocIds);
752+
searchContext.queryResult().incRef();
742753
return searchContext.queryResult();
743754
} catch (Exception e) {
744755
assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e);
@@ -779,10 +790,9 @@ public void executeFetchPhase(
779790
runAsync(getExecutor(readerContext.indexShard()), () -> {
780791
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null);
781792
try (
782-
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, false);
793+
SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.FETCH, false);
783794
SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(searchContext)
784795
) {
785-
searchContext.addFetchResult();
786796
searchContext.assignRescoreDocIds(readerContext.getRescoreDocIds(null));
787797
searchContext.searcher().setAggregatedDfs(readerContext.getAggregatedDfs(null));
788798
processScroll(request, readerContext, searchContext);
@@ -805,8 +815,7 @@ public void executeFetchPhase(ShardFetchRequest request, SearchShardTask task, A
805815
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.getShardSearchRequest());
806816
final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest));
807817
runAsync(getExecutor(readerContext.indexShard()), () -> {
808-
try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, false)) {
809-
searchContext.addFetchResult();
818+
try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.FETCH, false)) {
810819
if (request.lastEmittedDoc() != null) {
811820
searchContext.scrollContext().lastEmittedDoc = request.lastEmittedDoc();
812821
}
@@ -983,10 +992,12 @@ protected SearchContext createContext(
983992
ReaderContext readerContext,
984993
ShardSearchRequest request,
985994
SearchShardTask task,
995+
ResultsType resultsType,
986996
boolean includeAggregations
987997
) throws IOException {
988998
checkCancelled(task);
989999
final DefaultSearchContext context = createSearchContext(readerContext, request, defaultSearchTimeout);
1000+
resultsType.addResultsObject(context);
9901001
try {
9911002
if (request.scroll() != null) {
9921003
context.scrollContext().scroll = request.scroll();
@@ -1246,7 +1257,7 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc
12461257
enableRewriteAggsToFilterByFilter,
12471258
source.aggregations().isInSortOrderExecutionRequired()
12481259
);
1249-
context.addReleasable(aggContext);
1260+
context.addQuerySearchResultReleasable(aggContext);
12501261
try {
12511262
AggregatorFactories factories = source.aggregations().build(aggContext, null);
12521263
context.aggregations(new SearchContextAggregations(factories));
@@ -1447,6 +1458,41 @@ public ResponseCollectorService getResponseCollectorService() {
14471458
return this.responseCollectorService;
14481459
}
14491460

1461+
/**
1462+
* Used to indicate which result object should be instantiated when creating a search context
1463+
*/
1464+
enum ResultsType {
1465+
DFS {
1466+
@Override
1467+
void addResultsObject(SearchContext context) {
1468+
context.addDfsResult();
1469+
}
1470+
},
1471+
QUERY {
1472+
@Override
1473+
void addResultsObject(SearchContext context) {
1474+
context.addQueryResult();
1475+
}
1476+
},
1477+
FETCH {
1478+
@Override
1479+
void addResultsObject(SearchContext context) {
1480+
context.addFetchResult();
1481+
}
1482+
},
1483+
/**
1484+
* None is intended for use in testing, when we might not progress all the way to generating results
1485+
*/
1486+
NONE {
1487+
@Override
1488+
void addResultsObject(SearchContext context) {
1489+
// this space intentionally left blank
1490+
}
1491+
};
1492+
1493+
abstract void addResultsObject(SearchContext context);
1494+
}
1495+
14501496
class Reaper implements Runnable {
14511497
@Override
14521498
public void run() {

server/src/main/java/org/elasticsearch/search/fetch/QueryFetchSearchResult.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
13+
import org.elasticsearch.core.AbstractRefCounted;
14+
import org.elasticsearch.core.RefCounted;
1315
import org.elasticsearch.search.SearchPhaseResult;
1416
import org.elasticsearch.search.SearchShardTarget;
1517
import org.elasticsearch.search.internal.ShardSearchContextId;
@@ -21,17 +23,29 @@ public final class QueryFetchSearchResult extends SearchPhaseResult {
2123

2224
private final QuerySearchResult queryResult;
2325
private final FetchSearchResult fetchResult;
26+
private final RefCounted refCounted;
2427

2528
public QueryFetchSearchResult(StreamInput in) throws IOException {
2629
super(in);
27-
// TODO: Delegate refcounting to QuerySearchResult (see https://github.com/elastic/elasticsearch/pull/94023)
30+
// These get a ref count of 1 when we create them, so we don't need to incRef here
2831
queryResult = new QuerySearchResult(in);
2932
fetchResult = new FetchSearchResult(in);
33+
refCounted = AbstractRefCounted.of(() -> {
34+
queryResult.decRef();
35+
fetchResult.decRef();
36+
});
3037
}
3138

3239
public QueryFetchSearchResult(QuerySearchResult queryResult, FetchSearchResult fetchResult) {
3340
this.queryResult = queryResult;
3441
this.fetchResult = fetchResult;
42+
// We're acquiring a copy, we should incRef it
43+
this.queryResult.incRef();
44+
this.fetchResult.incRef();
45+
refCounted = AbstractRefCounted.of(() -> {
46+
queryResult.decRef();
47+
fetchResult.decRef();
48+
});
3549
}
3650

3751
@Override
@@ -73,4 +87,24 @@ public void writeTo(StreamOutput out) throws IOException {
7387
queryResult.writeTo(out);
7488
fetchResult.writeTo(out);
7589
}
90+
91+
@Override
92+
public void incRef() {
93+
refCounted.incRef();
94+
}
95+
96+
@Override
97+
public boolean tryIncRef() {
98+
return refCounted.tryIncRef();
99+
}
100+
101+
@Override
102+
public boolean decRef() {
103+
return refCounted.decRef();
104+
}
105+
106+
@Override
107+
public boolean hasReferences() {
108+
return refCounted.hasReferences();
109+
}
76110
}

server/src/main/java/org/elasticsearch/search/fetch/ScrollQueryFetchSearchResult.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,24 @@ public void writeTo(StreamOutput out) throws IOException {
6363
getSearchShardTarget().writeTo(out);
6464
result.writeTo(out);
6565
}
66+
67+
@Override
68+
public void incRef() {
69+
result.incRef();
70+
}
71+
72+
@Override
73+
public boolean tryIncRef() {
74+
return result.tryIncRef();
75+
}
76+
77+
@Override
78+
public boolean decRef() {
79+
return result.decRef();
80+
}
81+
82+
@Override
83+
public boolean hasReferences() {
84+
return result.hasReferences();
85+
}
6686
}

0 commit comments

Comments
 (0)