Skip to content

Commit 8a2f509

Browse files
authored
Always log data node failures (elastic#127420) (elastic#127511)
Log search exceptions as they occur on the data node no matter the value of error_trace. (cherry picked from commit 0538958) # Conflicts: # server/src/main/java/org/elasticsearch/search/SearchService.java # server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
1 parent 88c82f7 commit 8a2f509

File tree

5 files changed

+74
-167
lines changed

5 files changed

+74
-167
lines changed

qa/smoke-test-http/src/internalClusterTest/java/org/elasticsearch/http/SearchErrorTraceIT.java

Lines changed: 17 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -127,33 +127,7 @@ public void testSearchFailingQueryErrorTraceFalse() throws IOException {
127127
assertFalse(hasStackTrace.getAsBoolean());
128128
}
129129

130-
public void testDataNodeDoesNotLogStackTraceWhenErrorTraceTrue() throws IOException {
131-
setupIndexWithDocs();
132-
133-
Request searchRequest = new Request("POST", "/_search");
134-
searchRequest.setJsonEntity("""
135-
{
136-
"query": {
137-
"simple_query_string" : {
138-
"query": "foo",
139-
"fields": ["field"]
140-
}
141-
}
142-
}
143-
""");
144-
145-
String errorTriggeringIndex = "test2";
146-
int numShards = getNumShards(errorTriggeringIndex).numPrimaries;
147-
try (var mockLog = MockLog.capture(SearchService.class)) {
148-
ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
149-
150-
searchRequest.addParameter("error_trace", "true");
151-
getRestClient().performRequest(searchRequest);
152-
mockLog.assertAllExpectationsMatched();
153-
}
154-
}
155-
156-
public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmpty() throws IOException {
130+
public void testDataNodeLogsStackTrace() throws IOException {
157131
setupIndexWithDocs();
158132

159133
Request searchRequest = new Request("POST", "/_search");
@@ -173,10 +147,14 @@ public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmpty() throws IOExce
173147
try (var mockLog = MockLog.capture(SearchService.class)) {
174148
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
175149

176-
// error_trace defaults to false so we can test both cases with some randomization
177-
if (randomBoolean()) {
150+
// No matter the value of error_trace (empty, true, or false) we should see stack traces logged
151+
int errorTraceValue = randomIntBetween(0, 2);
152+
if (errorTraceValue == 0) {
153+
searchRequest.addParameter("error_trace", "true");
154+
} else if (errorTraceValue == 1) {
178155
searchRequest.addParameter("error_trace", "false");
179-
}
156+
} // else empty
157+
180158
getRestClient().performRequest(searchRequest);
181159
mockLog.assertAllExpectationsMatched();
182160
}
@@ -233,32 +211,7 @@ public void testMultiSearchFailingQueryErrorTraceFalse() throws IOException {
233211
assertFalse(hasStackTrace.getAsBoolean());
234212
}
235213

236-
public void testDataNodeDoesNotLogStackTraceWhenErrorTraceTrueMultiSearch() throws IOException {
237-
setupIndexWithDocs();
238-
239-
XContentType contentType = XContentType.JSON;
240-
MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
241-
new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
242-
);
243-
Request searchRequest = new Request("POST", "/_msearch");
244-
byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
245-
searchRequest.setEntity(
246-
new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
247-
);
248-
249-
searchRequest.addParameter("error_trace", "true");
250-
251-
String errorTriggeringIndex = "test2";
252-
int numShards = getNumShards(errorTriggeringIndex).numPrimaries;
253-
try (var mockLog = MockLog.capture(SearchService.class)) {
254-
ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
255-
256-
getRestClient().performRequest(searchRequest);
257-
mockLog.assertAllExpectationsMatched();
258-
}
259-
}
260-
261-
public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmptyMultiSearch() throws IOException {
214+
public void testDataNodeLogsStackTraceMultiSearch() throws IOException {
262215
setupIndexWithDocs();
263216

264217
XContentType contentType = XContentType.JSON;
@@ -271,16 +224,19 @@ public void testDataNodeLogsStackTraceWhenErrorTraceFalseOrEmptyMultiSearch() th
271224
new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
272225
);
273226

274-
// error_trace defaults to false so we can test both cases with some randomization
275-
if (randomBoolean()) {
276-
searchRequest.addParameter("error_trace", "false");
277-
}
278-
279227
String errorTriggeringIndex = "test2";
280228
int numShards = getNumShards(errorTriggeringIndex).numPrimaries;
281229
try (var mockLog = MockLog.capture(SearchService.class)) {
282230
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
283231

232+
// No matter the value of error_trace (empty, true, or false) we should see stack traces logged
233+
int errorTraceValue = randomIntBetween(0, 2);
234+
if (errorTraceValue == 0) {
235+
searchRequest.addParameter("error_trace", "true");
236+
} else if (errorTraceValue == 1) {
237+
searchRequest.addParameter("error_trace", "false");
238+
} // else empty
239+
284240
getRestClient().performRequest(searchRequest);
285241
mockLog.assertAllExpectationsMatched();
286242
}

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -554,10 +554,11 @@ protected void doClose() {
554554
}
555555

556556
/**
557-
* Wraps the listener to avoid sending StackTraces back to the coordinating
558-
* node if the `error_trace` header is set to {@code false}. Upon reading we
559-
* default to {@code true} to maintain the same behavior as before the change,
560-
* due to older nodes not being able to specify whether it needs stack traces.
557+
* Wraps the listener to ensure errors are logged and to avoid sending
558+
* StackTraces back to the coordinating node if the `error_trace` header is
559+
* set to {@code false}. Upon reading, we default to {@code true} to maintain
560+
* the same behavior as before the change, due to older nodes not being able
561+
* to specify whether they need stack traces.
561562
*
562563
* @param <T> the type of the response
563564
* @param listener the action listener to be wrapped
@@ -568,44 +569,47 @@ protected void doClose() {
568569
* @param threadPool with context where to write the new header
569570
* @return the wrapped action listener
570571
*/
571-
static <T> ActionListener<T> maybeWrapListenerForStackTrace(
572+
static <T> ActionListener<T> wrapListenerForErrorHandling(
572573
ActionListener<T> listener,
573574
TransportVersion version,
574575
String nodeId,
575576
ShardId shardId,
576577
long taskId,
577578
ThreadPool threadPool
578579
) {
579-
boolean header = true;
580+
final boolean header;
580581
if (version.onOrAfter(ERROR_TRACE_IN_TRANSPORT_HEADER) && threadPool.getThreadContext() != null) {
581582
header = Boolean.parseBoolean(threadPool.getThreadContext().getHeaderOrDefault("error_trace", "false"));
582-
}
583-
if (header == false) {
584-
return listener.delegateResponse((l, e) -> {
585-
org.apache.logging.log4j.util.Supplier<String> messageSupplier = () -> format(
586-
"[%s]%s: failed to execute search request for task [%d]",
587-
nodeId,
588-
shardId,
589-
taskId
590-
);
591-
// Keep this logic aligned with that of SUPPRESSED_ERROR_LOGGER in RestResponse
592-
if (ExceptionsHelper.status(e).getStatus() < 500 || ExceptionsHelper.isNodeOrShardUnavailableTypeException(e)) {
593-
logger.debug(messageSupplier, e);
594-
} else {
595-
logger.warn(messageSupplier, e);
596-
}
583+
} else {
584+
header = true;
585+
}
586+
return listener.delegateResponse((l, e) -> {
587+
org.apache.logging.log4j.util.Supplier<String> messageSupplier = () -> format(
588+
"[%s]%s: failed to execute search request for task [%d]",
589+
nodeId,
590+
shardId,
591+
taskId
592+
);
593+
// Keep this logic aligned with that of SUPPRESSED_ERROR_LOGGER in RestResponse
594+
if (ExceptionsHelper.status(e).getStatus() < 500 || ExceptionsHelper.isNodeOrShardUnavailableTypeException(e)) {
595+
logger.debug(messageSupplier, e);
596+
} else {
597+
logger.warn(messageSupplier, e);
598+
}
599+
600+
if (header == false) {
597601
ExceptionsHelper.unwrapCausesAndSuppressed(e, err -> {
598602
err.setStackTrace(EMPTY_STACK_TRACE_ARRAY);
599603
return false;
600604
});
601-
l.onFailure(e);
602-
});
603-
}
604-
return listener;
605+
}
606+
607+
l.onFailure(e);
608+
});
605609
}
606610

607611
public void executeDfsPhase(ShardSearchRequest request, SearchShardTask task, ActionListener<SearchPhaseResult> listener) {
608-
listener = maybeWrapListenerForStackTrace(
612+
listener = wrapListenerForErrorHandling(
609613
listener,
610614
request.getChannelVersion(),
611615
clusterService.localNode().getId(),
@@ -650,7 +654,7 @@ private void loadOrExecuteQueryPhase(final ShardSearchRequest request, final Sea
650654
}
651655

652656
public void executeQueryPhase(ShardSearchRequest request, CancellableTask task, ActionListener<SearchPhaseResult> listener) {
653-
ActionListener<SearchPhaseResult> finalListener = maybeWrapListenerForStackTrace(
657+
ActionListener<SearchPhaseResult> finalListener = wrapListenerForErrorHandling(
654658
listener,
655659
request.getChannelVersion(),
656660
clusterService.localNode().getId(),
@@ -860,7 +864,7 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, Cancella
860864
public void executeRankFeaturePhase(RankFeatureShardRequest request, SearchShardTask task, ActionListener<RankFeatureResult> listener) {
861865
final ReaderContext readerContext = findReaderContext(request.contextId(), request);
862866
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.getShardSearchRequest());
863-
listener = maybeWrapListenerForStackTrace(
867+
listener = wrapListenerForErrorHandling(
864868
listener,
865869
shardSearchRequest.getChannelVersion(),
866870
clusterService.localNode().getId(),
@@ -917,7 +921,7 @@ public void executeQueryPhase(
917921
TransportVersion version
918922
) {
919923
final LegacyReaderContext readerContext = (LegacyReaderContext) findReaderContext(request.contextId(), request);
920-
listener = maybeWrapListenerForStackTrace(
924+
listener = wrapListenerForErrorHandling(
921925
listener,
922926
version,
923927
clusterService.localNode().getId(),
@@ -974,7 +978,7 @@ public void executeQueryPhase(
974978
) {
975979
final ReaderContext readerContext = findReaderContext(request.contextId(), request.shardSearchRequest());
976980
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.shardSearchRequest());
977-
listener = maybeWrapListenerForStackTrace(
981+
listener = wrapListenerForErrorHandling(
978982
listener,
979983
version,
980984
clusterService.localNode().getId(),

server/src/test/java/org/elasticsearch/search/SearchServiceTests.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
import java.util.function.Predicate;
6464

6565
import static org.elasticsearch.common.Strings.format;
66-
import static org.elasticsearch.search.SearchService.maybeWrapListenerForStackTrace;
66+
import static org.elasticsearch.search.SearchService.wrapListenerForErrorHandling;
6767
import static org.hamcrest.CoreMatchers.is;
6868
import static org.hamcrest.Matchers.not;
6969

@@ -128,7 +128,7 @@ public Type getType() {
128128
doTestCanMatch(searchRequest, sortField, true, null, false);
129129
}
130130

131-
public void testMaybeWrapListenerForStackTrace() {
131+
public void testWrapListenerForErrorHandling() {
132132
ShardId shardId = new ShardId("index", "index", 0);
133133
// Tests that the same listener has stack trace if is not wrapped or does not have stack trace if it is wrapped.
134134
AtomicBoolean isWrapped = new AtomicBoolean(false);
@@ -151,12 +151,12 @@ public void onFailure(Exception e) {
151151
e.fillInStackTrace();
152152
assertThat(e.getStackTrace().length, is(not(0)));
153153
listener.onFailure(e);
154-
listener = maybeWrapListenerForStackTrace(listener, TransportVersion.current(), "node", shardId, 123L, threadPool);
154+
listener = wrapListenerForErrorHandling(listener, TransportVersion.current(), "node", shardId, 123L, threadPool);
155155
isWrapped.set(true);
156156
listener.onFailure(e);
157157
}
158158

159-
public void testMaybeWrapListenerForStackTraceDebugLog() {
159+
public void testWrapListenerForErrorHandlingDebugLog() {
160160
final String nodeId = "node";
161161
final String index = "index";
162162
ShardId shardId = new ShardId(index, index, 0);
@@ -189,12 +189,12 @@ public void onFailure(Exception e) {
189189
}
190190
};
191191
IllegalArgumentException e = new IllegalArgumentException(exceptionMessage); // 400-level exception
192-
listener = maybeWrapListenerForStackTrace(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
192+
listener = wrapListenerForErrorHandling(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
193193
listener.onFailure(e);
194194
}
195195
}
196196

197-
public void testMaybeWrapListenerForStackTraceWarnLog() {
197+
public void testWrapListenerForErrorHandlingWarnLog() {
198198
final String nodeId = "node";
199199
final String index = "index";
200200
ShardId shardId = new ShardId(index, index, 0);
@@ -226,7 +226,7 @@ public void onFailure(Exception e) {
226226
}
227227
};
228228
IllegalStateException e = new IllegalStateException(exceptionMessage); // 500-level exception
229-
listener = maybeWrapListenerForStackTrace(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
229+
listener = wrapListenerForErrorHandling(listener, TransportVersion.current(), nodeId, shardId, taskId, threadPool);
230230
listener.onFailure(e);
231231
}
232232
}

test/framework/src/main/java/org/elasticsearch/search/ErrorTraceHelper.java

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import java.util.stream.Collectors;
2727

2828
import static org.elasticsearch.common.Strings.format;
29-
import static org.elasticsearch.test.ESIntegTestCase.getNodeId;
3029
import static org.elasticsearch.test.ESIntegTestCase.internalCluster;
3130
import static org.elasticsearch.test.ESTestCase.asInstanceOf;
3231

@@ -90,32 +89,4 @@ public static void addSeenLoggingExpectations(int numShards, MockLog mockLog, St
9089
);
9190
}
9291
}
93-
94-
/**
95-
* Adds expectations for the _absence_ of debug logging of a message. An unseen expectation is added for each
96-
* combination of node in the internal cluster and shard in the index.
97-
*
98-
* @param numShards the number of shards in the index (an expectation will be added for each shard)
99-
* @param mockLog the mock log
100-
* @param errorTriggeringIndex the name of the index that will trigger the error
101-
*/
102-
public static void addUnseenLoggingExpectations(int numShards, MockLog mockLog, String errorTriggeringIndex) {
103-
for (String nodeName : internalCluster().getNodeNames()) {
104-
for (int shard = 0; shard < numShards; shard++) {
105-
mockLog.addExpectation(
106-
new MockLog.UnseenEventExpectation(
107-
format(
108-
"\"[%s][%s][%d]: failed to execute search request\" and an exception logged",
109-
getNodeId(nodeName),
110-
errorTriggeringIndex,
111-
shard
112-
),
113-
SearchService.class.getCanonicalName(),
114-
Level.DEBUG,
115-
format("[%s][%s][%d]: failed to execute search request", getNodeId(nodeName), errorTriggeringIndex, shard)
116-
)
117-
);
118-
}
119-
}
120-
}
12192
}

0 commit comments

Comments
 (0)