-
Notifications
You must be signed in to change notification settings - Fork 25.7k
Remove early phase failure in batched #136889
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
234089c
76e325c
bf76a65
7540aa9
c48633c
d9d0b7f
657c31b
1803ab7
f93d906
2ef1d2c
181c286
b60d882
961771c
e4f1f1c
7f5fa32
b89bf29
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| pr: 136889 | ||
| summary: Remove early phase failure in batched | ||
| area: Search | ||
| type: bug | ||
| issues: | ||
| - 134151 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,13 +48,11 @@ | |
| import org.elasticsearch.search.query.QuerySearchResult; | ||
| import org.elasticsearch.tasks.CancellableTask; | ||
| import org.elasticsearch.tasks.Task; | ||
| import org.elasticsearch.tasks.TaskCancelledException; | ||
| import org.elasticsearch.tasks.TaskId; | ||
| import org.elasticsearch.threadpool.ThreadPool; | ||
| import org.elasticsearch.transport.AbstractTransportRequest; | ||
| import org.elasticsearch.transport.BytesTransportResponse; | ||
| import org.elasticsearch.transport.LeakTracker; | ||
| import org.elasticsearch.transport.SendRequestTransportException; | ||
| import org.elasticsearch.transport.Transport; | ||
| import org.elasticsearch.transport.TransportActionProxy; | ||
| import org.elasticsearch.transport.TransportChannel; | ||
|
|
@@ -84,6 +82,9 @@ public class SearchQueryThenFetchAsyncAction extends AbstractSearchAsyncAction<S | |
| private static final Logger logger = LogManager.getLogger(SearchQueryThenFetchAsyncAction.class); | ||
|
|
||
| private static final TransportVersion BATCHED_QUERY_PHASE_VERSION = TransportVersion.fromName("batched_query_phase_version"); | ||
| private static final TransportVersion BATCHED_RESPONSE_MIGHT_INCLUDE_REDUCTION_FAILURE = TransportVersion.fromName( | ||
| "batched_response_might_include_reduction_failure" | ||
| ); | ||
|
|
||
| private final SearchProgressListener progressListener; | ||
|
|
||
|
|
@@ -225,20 +226,32 @@ public static final class NodeQueryResponse extends TransportResponse { | |
| private final RefCounted refCounted = LeakTracker.wrap(new SimpleRefCounted()); | ||
|
|
||
| private final Object[] results; | ||
| private final Exception reductionFailure; | ||
| private final SearchPhaseController.TopDocsStats topDocsStats; | ||
| private final QueryPhaseResultConsumer.MergeResult mergeResult; | ||
|
|
||
| public NodeQueryResponse(StreamInput in) throws IOException { | ||
| this.results = in.readArray(i -> i.readBoolean() ? new QuerySearchResult(i) : i.readException(), Object[]::new); | ||
| this.mergeResult = QueryPhaseResultConsumer.MergeResult.readFrom(in); | ||
| this.topDocsStats = SearchPhaseController.TopDocsStats.readFrom(in); | ||
| if (in.getTransportVersion().supports(BATCHED_RESPONSE_MIGHT_INCLUDE_REDUCTION_FAILURE) && in.readBoolean()) { | ||
| this.reductionFailure = in.readException(); | ||
| this.mergeResult = null; | ||
| this.topDocsStats = null; | ||
| } else { | ||
| this.reductionFailure = null; | ||
| this.mergeResult = QueryPhaseResultConsumer.MergeResult.readFrom(in); | ||
| this.topDocsStats = SearchPhaseController.TopDocsStats.readFrom(in); | ||
| } | ||
| } | ||
|
|
||
| // public for tests | ||
| public Object[] getResults() { | ||
| return results; | ||
| } | ||
|
|
||
| Exception getReductionFailure() { | ||
| return reductionFailure; | ||
| } | ||
|
|
||
| @Override | ||
| public void writeTo(StreamOutput out) throws IOException { | ||
| out.writeVInt(results.length); | ||
|
|
@@ -249,7 +262,17 @@ public void writeTo(StreamOutput out) throws IOException { | |
| writePerShardResult(out, (QuerySearchResult) result); | ||
| } | ||
| } | ||
| writeMergeResult(out, mergeResult, topDocsStats); | ||
| if (out.getTransportVersion().supports(BATCHED_RESPONSE_MIGHT_INCLUDE_REDUCTION_FAILURE)) { | ||
| boolean hasReductionFailure = reductionFailure != null; | ||
| out.writeBoolean(hasReductionFailure); | ||
| if (hasReductionFailure) { | ||
| out.writeException(reductionFailure); | ||
| } else { | ||
| writeMergeResult(out, mergeResult, topDocsStats); | ||
| } | ||
| } else { | ||
| writeMergeResult(out, mergeResult, topDocsStats); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -502,7 +525,12 @@ public Executor executor() { | |
| @Override | ||
| public void handleResponse(NodeQueryResponse response) { | ||
| if (results instanceof QueryPhaseResultConsumer queryPhaseResultConsumer) { | ||
| queryPhaseResultConsumer.addBatchedPartialResult(response.topDocsStats, response.mergeResult); | ||
| Exception reductionFailure = response.getReductionFailure(); | ||
| if (reductionFailure != null) { | ||
| queryPhaseResultConsumer.failure.compareAndSet(null, reductionFailure); | ||
| } else { | ||
| queryPhaseResultConsumer.addBatchedPartialResult(response.topDocsStats, response.mergeResult); | ||
| } | ||
| } | ||
| for (int i = 0; i < response.results.length; i++) { | ||
| var s = request.shards.get(i); | ||
|
|
@@ -526,21 +554,7 @@ public void handleResponse(NodeQueryResponse response) { | |
| public void handleException(TransportException e) { | ||
| Exception cause = (Exception) ExceptionsHelper.unwrapCause(e); | ||
| logger.debug("handling node search exception coming from [" + nodeId + "]", cause); | ||
| if (e instanceof SendRequestTransportException || cause instanceof TaskCancelledException) { | ||
| // two possible special cases here where we do not want to fail the phase: | ||
| // failure to send out the request -> handle things the same way a shard would fail with unbatched execution | ||
| // as this could be a transient failure and partial results we may have are still valid | ||
| // cancellation of the whole batched request on the remote -> maybe we timed out or so, partial results may | ||
| // still be valid | ||
| onNodeQueryFailure(e, request, routing); | ||
| } else { | ||
| // Remote failure that wasn't due to networking or cancellation means that the data node was unable to reduce | ||
| // its local results. Failure to reduce always fails the phase without exception so we fail the phase here. | ||
| if (results instanceof QueryPhaseResultConsumer queryPhaseResultConsumer) { | ||
| queryPhaseResultConsumer.failure.compareAndSet(null, cause); | ||
| } | ||
| onPhaseFailure(getName(), "", cause); | ||
| } | ||
| onNodeQueryFailure(e, request, routing); | ||
| } | ||
| }); | ||
| }); | ||
|
|
@@ -801,68 +815,80 @@ void onShardDone() { | |
| if (countDown.countDown() == false) { | ||
| return; | ||
| } | ||
| RecyclerBytesStreamOutput out = null; | ||
| boolean success = false; | ||
| var channelListener = new ChannelActionListener<>(channel); | ||
| RecyclerBytesStreamOutput out = dependencies.transportService.newNetworkBytesStream(); | ||
| out.setTransportVersion(channel.getVersion()); | ||
| try (queryPhaseResultConsumer) { | ||
| var failure = queryPhaseResultConsumer.failure.get(); | ||
| if (failure != null) { | ||
| handleMergeFailure(failure, channelListener, namedWriteableRegistry); | ||
| return; | ||
| } | ||
| final QueryPhaseResultConsumer.MergeResult mergeResult; | ||
| try { | ||
| mergeResult = Objects.requireNonNullElse( | ||
| queryPhaseResultConsumer.consumePartialMergeResultDataNode(), | ||
| EMPTY_PARTIAL_MERGE_RESULT | ||
| ); | ||
| } catch (Exception e) { | ||
| handleMergeFailure(e, channelListener, namedWriteableRegistry); | ||
| return; | ||
| } | ||
| // translate shard indices to those on the coordinator so that it can interpret the merge result without adjustments, | ||
| // also collect the set of indices that may be part of a subsequent fetch operation here so that we can release all other | ||
| // indices without a roundtrip to the coordinating node | ||
| final BitSet relevantShardIndices = new BitSet(searchRequest.shards.size()); | ||
| if (mergeResult.reducedTopDocs() != null) { | ||
| for (ScoreDoc scoreDoc : mergeResult.reducedTopDocs().scoreDocs) { | ||
| final int localIndex = scoreDoc.shardIndex; | ||
| scoreDoc.shardIndex = searchRequest.shards.get(localIndex).shardIndex; | ||
| relevantShardIndices.set(localIndex); | ||
| } | ||
| } | ||
| final int resultCount = queryPhaseResultConsumer.getNumShards(); | ||
| out = dependencies.transportService.newNetworkBytesStream(); | ||
| out.setTransportVersion(channel.getVersion()); | ||
| try { | ||
| out.writeVInt(resultCount); | ||
| for (int i = 0; i < resultCount; i++) { | ||
| var result = queryPhaseResultConsumer.results.get(i); | ||
| if (result == null) { | ||
| NodeQueryResponse.writePerShardException(out, failures.remove(i)); | ||
| } else { | ||
| // free context id and remove it from the result right away in case we don't need it anymore | ||
| maybeFreeContext(result, relevantShardIndices, namedWriteableRegistry); | ||
| NodeQueryResponse.writePerShardResult(out, result); | ||
| } | ||
| } | ||
| NodeQueryResponse.writeMergeResult(out, mergeResult, queryPhaseResultConsumer.topDocsStats); | ||
| success = true; | ||
| } catch (IOException e) { | ||
| handleMergeFailure(e, channelListener, namedWriteableRegistry); | ||
| return; | ||
| } | ||
| } finally { | ||
| if (success == false && out != null) { | ||
| out.close(); | ||
| Exception reductionFailure = queryPhaseResultConsumer.failure.get(); | ||
| if (reductionFailure == null) { | ||
| writeSuccessfulResponse(out); | ||
| } else { | ||
| writeReductionFailureResponse(out, reductionFailure); | ||
| } | ||
| } catch (IOException e) { | ||
| releaseAllResultsContexts(); | ||
| channelListener.onFailure(e); | ||
| } | ||
| ActionListener.respondAndRelease( | ||
| channelListener, | ||
| new BytesTransportResponse(out.moveToBytesReference(), out.getTransportVersion()) | ||
| ); | ||
| } | ||
|
|
||
| private void writeSuccessfulResponse(RecyclerBytesStreamOutput out) throws IOException { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it necessary to refactor this serialization code? Moving it around makes it more difficult to eye ball it somehow.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. disregard this previous comment. I understand why you did things the way you did. It's fine as-is. As for the review, I simply trust that the successful writing is a plain copy of the previous code we had, with no changes.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's right, same as the previous code except for the additional |
||
| final QueryPhaseResultConsumer.MergeResult mergeResult; | ||
| try { | ||
| mergeResult = Objects.requireNonNullElse( | ||
| queryPhaseResultConsumer.consumePartialMergeResultDataNode(), | ||
| EMPTY_PARTIAL_MERGE_RESULT | ||
| ); | ||
| } catch (Exception e) { | ||
| writeReductionFailureResponse(out, e); | ||
| return; | ||
| } | ||
| // translate shard indices to those on the coordinator so that it can interpret the merge result without adjustments, | ||
| // also collect the set of indices that may be part of a subsequent fetch operation here so that we can release all other | ||
| // indices without a roundtrip to the coordinating node | ||
| final BitSet relevantShardIndices = new BitSet(searchRequest.shards.size()); | ||
| if (mergeResult.reducedTopDocs() != null) { | ||
| for (ScoreDoc scoreDoc : mergeResult.reducedTopDocs().scoreDocs) { | ||
| final int localIndex = scoreDoc.shardIndex; | ||
| scoreDoc.shardIndex = searchRequest.shards.get(localIndex).shardIndex; | ||
| relevantShardIndices.set(localIndex); | ||
| } | ||
| } | ||
| final int resultCount = queryPhaseResultConsumer.getNumShards(); | ||
| out.writeVInt(resultCount); | ||
| for (int i = 0; i < resultCount; i++) { | ||
| var result = queryPhaseResultConsumer.results.get(i); | ||
| if (result == null) { | ||
| NodeQueryResponse.writePerShardException(out, failures.remove(i)); | ||
| } else { | ||
| // free context id and remove it from the result right away in case we don't need it anymore | ||
| maybeFreeContext(result, relevantShardIndices, namedWriteableRegistry); | ||
| NodeQueryResponse.writePerShardResult(out, result); | ||
| } | ||
| } | ||
| out.writeBoolean(false); // does not have a reduction failure | ||
| NodeQueryResponse.writeMergeResult(out, mergeResult, queryPhaseResultConsumer.topDocsStats); | ||
| } | ||
|
|
||
| private void writeReductionFailureResponse(RecyclerBytesStreamOutput out, Exception reductionFailure) throws IOException { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a comment about where the corresponding read code for this can be found? future readers may be looking for it and not easily finding it, due to the fact that we write to an opaque bytes transport response.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea, done. |
||
| final int resultCount = queryPhaseResultConsumer.getNumShards(); | ||
| out.writeVInt(resultCount); | ||
| for (int i = 0; i < resultCount; i++) { | ||
| var result = queryPhaseResultConsumer.results.get(i); | ||
| if (result == null) { | ||
| NodeQueryResponse.writePerShardException(out, failures.remove(i)); | ||
| } else { | ||
| NodeQueryResponse.writePerShardResult(out, result); | ||
| } | ||
| } | ||
| out.writeBoolean(true); // does have a reduction failure | ||
| out.writeException(reductionFailure); | ||
| releaseAllResultsContexts(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be in a finally block ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the case of an IOException, the caller releases contexts in a catch block. So I think this is alright, else we'll be releasing twice? |
||
| } | ||
|
|
||
| private void maybeFreeContext( | ||
| SearchPhaseResult result, | ||
| BitSet relevantShardIndices, | ||
|
|
@@ -881,11 +907,7 @@ && isPartOfPIT(searchRequest.searchRequest, q.getContextId(), namedWriteableRegi | |
| } | ||
| } | ||
|
|
||
| private void handleMergeFailure( | ||
| Exception e, | ||
| ChannelActionListener<TransportResponse> channelListener, | ||
| NamedWriteableRegistry namedWriteableRegistry | ||
| ) { | ||
| private void releaseAllResultsContexts() { | ||
| queryPhaseResultConsumer.getSuccessfulResults() | ||
| .forEach( | ||
| searchPhaseResult -> releaseLocalContext( | ||
|
|
@@ -895,7 +917,6 @@ private void handleMergeFailure( | |
| namedWriteableRegistry | ||
| ) | ||
| ); | ||
| channelListener.onFailure(e); | ||
| } | ||
|
|
||
| void consumeResult(QuerySearchResult queryResult) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 9198000 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| dimension_values,9197000 | ||
| batched_response_might_include_reduction_failure,9198000 |
Uh oh!
There was an error while loading. Please reload this page.