-
Notifications
You must be signed in to change notification settings - Fork 25.6k
ES|QL: Wrap remote errors with cluster name to provide more context #123156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
b2fc21d
a049cc9
e32956a
cd3fefa
6daa9e1
bc166cc
db90b71
21cfe8a
a7bf704
e1e9592
8e9976d
6dd8421
8f2a39d
57a133a
eb27643
ea67731
a155983
24b103d
cb130f5
ca40d5c
fafa4ae
7dd834e
a5d14a4
99f9b45
2ac89c7
32e26f6
764933f
8ef049f
04149a4
4e0baa3
8a5d6a0
90b314a
0996521
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 123156 | ||
summary: Wrap remote errors with cluster name to provide more context | ||
area: Search | ||
type: enhancement | ||
issues: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.esql.action; | ||
|
||
import org.elasticsearch.compute.operator.exchange.ExchangeService; | ||
import org.elasticsearch.test.transport.MockTransportService; | ||
import org.elasticsearch.transport.TransportService; | ||
import org.elasticsearch.xpack.esql.RemoteComputeException; | ||
|
||
import static org.hamcrest.Matchers.is; | ||
|
||
public class EsqlRemoteErrorWrapIT extends AbstractCrossClusterTestCase { | ||
|
||
public void testThatRemoteErrorsAreWrapped() throws Exception { | ||
setupClusters(2); | ||
setSkipUnavailable(REMOTE_CLUSTER_1, false); | ||
setSkipUnavailable(REMOTE_CLUSTER_2, false); | ||
|
||
/* | ||
* Let's say something went wrong with the Exchange and its specifics when talking to a remote. | ||
* And let's pretend only cluster-a is affected. | ||
*/ | ||
for (var nodes : cluster(REMOTE_CLUSTER_1).getNodeNames()) { | ||
((MockTransportService) cluster(REMOTE_CLUSTER_1).getInstance(TransportService.class, nodes)).addRequestHandlingBehavior( | ||
ExchangeService.OPEN_EXCHANGE_ACTION_NAME, | ||
(requestHandler, transportRequest, transportChannel, transportTask) -> { | ||
throw new IllegalArgumentException("some error to wreck havoc"); | ||
} | ||
); | ||
} | ||
|
||
RemoteComputeException wrappedError = expectThrows( | ||
RemoteComputeException.class, | ||
() -> runQuery("FROM " + REMOTE_CLUSTER_1 + ":*," + REMOTE_CLUSTER_2 + ":* | LIMIT 100", false) | ||
); | ||
assertThat(wrappedError.getMessage(), is("Remote [cluster-a] encountered an error")); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.esql; | ||
|
||
import org.elasticsearch.ElasticsearchException; | ||
import org.elasticsearch.ExceptionsHelper; | ||
import org.elasticsearch.rest.RestStatus; | ||
|
||
import java.util.Objects; | ||
|
||
/** | ||
* Represents an error that occurred when starting compute on a remote node. | ||
* It allows capturing some context such as the cluster alias that encountered the error. | ||
*/ | ||
public class RemoteComputeException extends ElasticsearchException { | ||
|
||
/** | ||
* @param clusterAlias Name of the cluster. | ||
* @param cause Error that was encountered. | ||
*/ | ||
public RemoteComputeException(String clusterAlias, Throwable cause) { | ||
super("Remote [" + clusterAlias + "] encountered an error", cause); | ||
Objects.requireNonNull(cause); | ||
} | ||
|
||
@Override | ||
public RestStatus status() { | ||
// This is similar to what we do in SearchPhaseExecutionException. | ||
return ExceptionsHelper.status(getCause()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -40,6 +40,7 @@ | |||
import org.elasticsearch.transport.RemoteClusterAware; | ||||
import org.elasticsearch.transport.TransportRequest; | ||||
import org.elasticsearch.transport.TransportService; | ||||
import org.elasticsearch.xpack.esql.RemoteComputeException; | ||||
import org.elasticsearch.xpack.esql.action.EsqlExecutionInfo; | ||||
import org.elasticsearch.xpack.esql.action.EsqlQueryAction; | ||||
import org.elasticsearch.xpack.esql.core.expression.Attribute; | ||||
|
@@ -300,6 +301,7 @@ public void execute( | |||
cancelQueryOnFailure, | ||||
execInfo, | ||||
computeListener.acquireCompute() | ||||
.delegateResponse((l, ex) -> l.onFailure(new RemoteComputeException(cluster.clusterAlias(), ex))) | ||||
|
out.writeException(exception); |
Maybe add an async query with failures to verify this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cool, if it can be serialized then it needs to be registered as a serializable one, which makes me wonder if we can reuse an existing one instead to avoid that ceremony :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the catch! Yes, the exception is getting serialised for asynchronous queries and has to be handled accordingly.
wonder if we can reuse an existing one instead to avoid that ceremony
To re-use an existing exception, we primarily need to fulfil 2 requirements:
- It should propagate the status of the cause, and,
- It should not implement the ES wrapper interface to prevent unwrapping when the error is sent back to the user (which discards the context we've built up, i.e. the remote's name).
I don't see any exceptions that we can reuse.
Uh oh!
There was an error while loading. Please reload this page.