Skip to content

Commit 52afa62

Browse files
Pass only ElasticsearchTimeoutException through the response and
others as top-level errors
1 parent fe975f7 commit 52afa62

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

server/src/internalClusterTest/java/org/elasticsearch/indices/cluster/RemoteFieldCapsForceConnectTimeoutIT.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public void testTimeoutSetting() {
7575
MockTransportService mts = (MockTransportService) cluster(LOCAL_CLUSTER).getInstance(TransportService.class, nodeName);
7676

7777
mts.addConnectBehavior(
78-
cluster(REMOTE_CLUSTER_1).getInstance(TransportService.class, cluster(REMOTE_CLUSTER_1).getNodeNames()[0]),
78+
cluster(REMOTE_CLUSTER_1).getInstance(TransportService.class, (String) null),
7979
((transport, discoveryNode, profile, listener) -> {
8080
try {
8181
latch.await();
@@ -103,7 +103,13 @@ public void testTimeoutSetting() {
103103
throw new AssertionError(e);
104104
} finally {
105105
var fieldCapsRequest = new FieldCapabilitiesRequest();
106-
fieldCapsRequest.indices("*", "*:*");
106+
/*
107+
* We have a local and 2 remote clusters but will target only the remote that we stalled.
108+
* This is because when the timeout kicks in, and we move on from the stalled remote, we do not want
109+
* the error to be a top-level error. Rather, it must be present in the response object under "failures".
110+
* All other errors are free to be top-level errors though.
111+
*/
112+
fieldCapsRequest.indices(REMOTE_CLUSTER_1 + ":*");
107113
fieldCapsRequest.fields("foo", "bar", "baz");
108114
var result = safeGet(client().execute(TransportFieldCapabilitiesAction.TYPE, fieldCapsRequest));
109115

server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.apache.lucene.util.ArrayUtil;
1313
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
14+
import org.elasticsearch.ElasticsearchTimeoutException;
1415
import org.elasticsearch.ExceptionsHelper;
1516
import org.elasticsearch.action.ActionListener;
1617
import org.elasticsearch.action.ActionListenerResponseHandler;
@@ -398,7 +399,21 @@ private static void mergeIndexResponses(
398399
listener.onResponse(new FieldCapabilitiesResponse(new ArrayList<>(indexResponses.values()), failures));
399400
}
400401
} else {
401-
listener.onResponse(new FieldCapabilitiesResponse(Collections.emptyList(), failures));
402+
// we have no responses at all, maybe because of errors
403+
if (indexFailures.isEmpty() == false) {
404+
/*
405+
* Under no circumstances are we to pass timeout errors originating from SubscribableListener as top-level errors.
406+
* Instead, they should always be passed through the response object, as part of "failures".
407+
*/
408+
if (failures.stream().anyMatch(failure -> failure.getException() instanceof ElasticsearchTimeoutException)) {
409+
listener.onResponse(new FieldCapabilitiesResponse(Collections.emptyList(), failures));
410+
} else {
411+
// throw back the first exception
412+
listener.onFailure(failures.get(0).getException());
413+
}
414+
} else {
415+
listener.onResponse(new FieldCapabilitiesResponse(Collections.emptyList(), Collections.emptyList()));
416+
}
402417
}
403418
}
404419

0 commit comments

Comments
 (0)