Skip to content

Commit fdaba56

Browse files
committed
Debug log stack trace before it is cleared
1 parent 930b4ab commit fdaba56

File tree

9 files changed

+497
-33
lines changed

9 files changed

+497
-33
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ expensive messages that will usually be discarded:
467467

468468
Logging is an important behaviour of the system and sometimes deserves its own
469469
unit tests, especially if there is complex logic for computing what is logged
470-
and when to log it. You can use a `org.elasticsearch.test.MockLogAppender` to
470+
and when to log it. You can use a `org.elasticsearch.test.MockLog` to
471471
make assertions about the logs that are being emitted.
472472

473473
Logging is a powerful diagnostic technique, but it is not the only possibility.

qa/smoke-test-http/src/internalClusterTest/java/org/elasticsearch/http/SearchErrorTraceIT.java

Lines changed: 171 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,25 @@
1111

1212
import org.apache.http.entity.ContentType;
1313
import org.apache.http.nio.entity.NByteArrayEntity;
14+
import org.apache.logging.log4j.Level;
15+
import org.apache.logging.log4j.LogManager;
16+
import org.apache.logging.log4j.core.config.Configurator;
1417
import org.elasticsearch.action.search.MultiSearchRequest;
1518
import org.elasticsearch.action.search.SearchRequest;
1619
import org.elasticsearch.client.Request;
20+
import org.elasticsearch.cluster.metadata.IndexMetadata;
21+
import org.elasticsearch.common.settings.Settings;
1722
import org.elasticsearch.common.util.CollectionUtils;
1823
import org.elasticsearch.plugins.Plugin;
1924
import org.elasticsearch.search.ErrorTraceHelper;
25+
import org.elasticsearch.search.SearchService;
2026
import org.elasticsearch.search.builder.SearchSourceBuilder;
27+
import org.elasticsearch.test.MockLog;
2128
import org.elasticsearch.test.transport.MockTransportService;
2229
import org.elasticsearch.xcontent.XContentType;
30+
import org.junit.AfterClass;
2331
import org.junit.Before;
32+
import org.junit.BeforeClass;
2433

2534
import java.io.IOException;
2635
import java.nio.charset.Charset;
@@ -32,24 +41,41 @@
3241
public class SearchErrorTraceIT extends HttpSmokeTestCase {
3342
private BooleanSupplier hasStackTrace;
3443

44+
private static final String loggerName = "org.elasticsearch.search.SearchService";
45+
private static Level originalLogLevel;
46+
3547
@Override
3648
protected Collection<Class<? extends Plugin>> nodePlugins() {
3749
return CollectionUtils.appendToCopyNoNullElements(super.nodePlugins(), MockTransportService.TestPlugin.class);
3850
}
3951

52+
@BeforeClass
53+
public static void setDebugLogLevel() {
54+
originalLogLevel = LogManager.getLogger(loggerName).getLevel();
55+
Configurator.setLevel(loggerName, Level.DEBUG);
56+
}
57+
58+
@AfterClass
59+
public static void resetLogLevel() {
60+
Configurator.setLevel(loggerName, originalLogLevel);
61+
}
62+
4063
@Before
4164
public void setupMessageListener() {
4265
hasStackTrace = ErrorTraceHelper.setupErrorTraceListener(internalCluster());
4366
}
4467

45-
private void setupIndexWithDocs() {
46-
createIndex("test1", "test2");
68+
private int setupIndexWithDocs() {
69+
int numShards = between(DEFAULT_MIN_NUM_SHARDS, DEFAULT_MAX_NUM_SHARDS);
70+
createIndex("test1", Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards).build());
71+
createIndex("test2", Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards).build());
4772
indexRandom(
4873
true,
4974
prepareIndex("test1").setId("1").setSource("field", "foo"),
5075
prepareIndex("test2").setId("10").setSource("field", 5)
5176
);
5277
refresh();
78+
return numShards;
5379
}
5480

5581
public void testSearchFailingQueryErrorTraceDefault() throws IOException {
@@ -108,6 +134,80 @@ public void testSearchFailingQueryErrorTraceFalse() throws IOException {
108134
assertFalse(hasStackTrace.getAsBoolean());
109135
}
110136

137+
public void testLoggingInSearchFailingQueryErrorTraceDefault() throws IOException {
138+
int numShards = setupIndexWithDocs();
139+
140+
Request searchRequest = new Request("POST", "/_search");
141+
searchRequest.setJsonEntity("""
142+
{
143+
"query": {
144+
"simple_query_string" : {
145+
"query": "foo",
146+
"fields": ["field"]
147+
}
148+
}
149+
}
150+
""");
151+
152+
String errorTriggeringIndex = "test2";
153+
try (var mockLog = MockLog.capture(SearchService.class)) {
154+
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
155+
156+
getRestClient().performRequest(searchRequest);
157+
mockLog.assertAllExpectationsMatched();
158+
}
159+
}
160+
161+
public void testNoLoggingInSearchFailingQueryErrorTraceTrue() throws IOException {
162+
int numShards = setupIndexWithDocs();
163+
164+
Request searchRequest = new Request("POST", "/_search");
165+
searchRequest.setJsonEntity("""
166+
{
167+
"query": {
168+
"simple_query_string" : {
169+
"query": "foo",
170+
"fields": ["field"]
171+
}
172+
}
173+
}
174+
""");
175+
176+
String errorTriggeringIndex = "test2";
177+
try (var mockLog = MockLog.capture(SearchService.class)) {
178+
ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
179+
180+
searchRequest.addParameter("error_trace", "true");
181+
getRestClient().performRequest(searchRequest);
182+
mockLog.assertAllExpectationsMatched();
183+
}
184+
}
185+
186+
public void testLoggingInSearchFailingQueryErrorTraceFalse() throws IOException {
187+
int numShards = setupIndexWithDocs();
188+
189+
Request searchRequest = new Request("POST", "/_search");
190+
searchRequest.setJsonEntity("""
191+
{
192+
"query": {
193+
"simple_query_string" : {
194+
"query": "foo",
195+
"fields": ["field"]
196+
}
197+
}
198+
}
199+
""");
200+
201+
String errorTriggeringIndex = "test2";
202+
try (var mockLog = MockLog.capture(SearchService.class)) {
203+
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
204+
205+
searchRequest.addParameter("error_trace", "false");
206+
getRestClient().performRequest(searchRequest);
207+
mockLog.assertAllExpectationsMatched();
208+
}
209+
}
210+
111211
public void testMultiSearchFailingQueryErrorTraceDefault() throws IOException {
112212
setupIndexWithDocs();
113213

@@ -158,4 +258,73 @@ public void testMultiSearchFailingQueryErrorTraceFalse() throws IOException {
158258

159259
assertFalse(hasStackTrace.getAsBoolean());
160260
}
261+
262+
public void testLoggingInMultiSearchFailingQueryErrorTraceDefault() throws IOException {
263+
int numShards = setupIndexWithDocs();
264+
265+
XContentType contentType = XContentType.JSON;
266+
MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
267+
new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
268+
);
269+
Request searchRequest = new Request("POST", "/_msearch");
270+
byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
271+
searchRequest.setEntity(
272+
new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
273+
);
274+
275+
String errorTriggeringIndex = "test2";
276+
try (var mockLog = MockLog.capture(SearchService.class)) {
277+
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
278+
279+
getRestClient().performRequest(searchRequest);
280+
mockLog.assertAllExpectationsMatched();
281+
}
282+
}
283+
284+
public void testLoggingInMultiSearchFailingQueryErrorTraceTrue() throws IOException {
285+
int numShards = setupIndexWithDocs();
286+
287+
XContentType contentType = XContentType.JSON;
288+
MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
289+
new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
290+
);
291+
Request searchRequest = new Request("POST", "/_msearch");
292+
byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
293+
searchRequest.setEntity(
294+
new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
295+
);
296+
297+
searchRequest.addParameter("error_trace", "true");
298+
299+
String errorTriggeringIndex = "test2";
300+
try (var mockLog = MockLog.capture(SearchService.class)) {
301+
ErrorTraceHelper.addUnseenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
302+
303+
getRestClient().performRequest(searchRequest);
304+
mockLog.assertAllExpectationsMatched();
305+
}
306+
}
307+
308+
public void testLoggingInMultiSearchFailingQueryErrorTraceFalse() throws IOException {
309+
int numShards = setupIndexWithDocs();
310+
311+
XContentType contentType = XContentType.JSON;
312+
MultiSearchRequest multiSearchRequest = new MultiSearchRequest().add(
313+
new SearchRequest("test*").source(new SearchSourceBuilder().query(simpleQueryStringQuery("foo").field("field")))
314+
);
315+
Request searchRequest = new Request("POST", "/_msearch");
316+
byte[] requestBody = MultiSearchRequest.writeMultiLineFormat(multiSearchRequest, contentType.xContent());
317+
searchRequest.setEntity(
318+
new NByteArrayEntity(requestBody, ContentType.create(contentType.mediaTypeWithoutParameters(), (Charset) null))
319+
);
320+
searchRequest.addParameter("error_trace", "false");
321+
322+
String errorTriggeringIndex = "test2";
323+
try (var mockLog = MockLog.capture(SearchService.class)) {
324+
ErrorTraceHelper.addSeenLoggingExpectations(numShards, mockLog, errorTriggeringIndex);
325+
326+
getRestClient().performRequest(searchRequest);
327+
mockLog.assertAllExpectationsMatched();
328+
}
329+
}
161330
}

server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,7 @@ public static void registerRequestHandler(TransportService transportService, Sea
456456
(request, channel, task) -> searchService.executeQueryPhase(
457457
request,
458458
(SearchShardTask) task,
459-
new ChannelActionListener<>(channel),
460-
channel.getVersion()
459+
new ChannelActionListener<>(channel)
461460
)
462461
);
463462
TransportActionProxy.registerProxyAction(transportService, QUERY_ID_ACTION_NAME, true, QuerySearchResult::new);
@@ -469,8 +468,7 @@ public static void registerRequestHandler(TransportService transportService, Sea
469468
(request, channel, task) -> searchService.executeQueryPhase(
470469
request,
471470
(SearchShardTask) task,
472-
new ChannelActionListener<>(channel),
473-
channel.getVersion()
471+
new ChannelActionListener<>(channel)
474472
)
475473
);
476474
TransportActionProxy.registerProxyAction(transportService, QUERY_SCROLL_ACTION_NAME, true, ScrollQuerySearchResult::new);

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import org.apache.lucene.search.TopDocs;
1919
import org.elasticsearch.ElasticsearchException;
2020
import org.elasticsearch.ExceptionsHelper;
21-
import org.elasticsearch.TransportVersion;
2221
import org.elasticsearch.action.ActionListener;
2322
import org.elasticsearch.action.ActionRunnable;
2423
import org.elasticsearch.action.ResolvedIndices;
@@ -156,6 +155,7 @@
156155
import java.util.function.Supplier;
157156

158157
import static org.elasticsearch.TransportVersions.ERROR_TRACE_IN_TRANSPORT_HEADER;
158+
import static org.elasticsearch.common.Strings.format;
159159
import static org.elasticsearch.core.TimeValue.timeValueHours;
160160
import static org.elasticsearch.core.TimeValue.timeValueMillis;
161161
import static org.elasticsearch.core.TimeValue.timeValueMinutes;
@@ -537,21 +537,27 @@ protected void doClose() {
537537
*
538538
* @param <T> the type of the response
539539
* @param listener the action listener to be wrapped
540-
* @param version channel version of the request
540+
* @param request the shard request being executed
541541
* @param threadPool with context where to write the new header
542+
* @param clusterService the cluster service
542543
* @return the wrapped action listener
543544
*/
544545
static <T> ActionListener<T> maybeWrapListenerForStackTrace(
545546
ActionListener<T> listener,
546-
TransportVersion version,
547-
ThreadPool threadPool
547+
ShardSearchRequest request,
548+
ThreadPool threadPool,
549+
ClusterService clusterService
548550
) {
549551
boolean header = true;
550-
if (version.onOrAfter(ERROR_TRACE_IN_TRANSPORT_HEADER) && threadPool.getThreadContext() != null) {
552+
if (request.getChannelVersion().onOrAfter(ERROR_TRACE_IN_TRANSPORT_HEADER) && threadPool.getThreadContext() != null) {
551553
header = Boolean.parseBoolean(threadPool.getThreadContext().getHeaderOrDefault("error_trace", "false"));
552554
}
553555
if (header == false) {
554556
return listener.delegateResponse((l, e) -> {
557+
logger.debug(
558+
() -> format("[%s]%s Clearing stack trace before transport:", clusterService.localNode().getId(), request.shardId()),
559+
e
560+
);
555561
ExceptionsHelper.unwrapCausesAndSuppressed(e, err -> {
556562
err.setStackTrace(EMPTY_STACK_TRACE_ARRAY);
557563
return false;
@@ -563,7 +569,7 @@ static <T> ActionListener<T> maybeWrapListenerForStackTrace(
563569
}
564570

565571
public void executeDfsPhase(ShardSearchRequest request, SearchShardTask task, ActionListener<SearchPhaseResult> listener) {
566-
listener = maybeWrapListenerForStackTrace(listener, request.getChannelVersion(), threadPool);
572+
listener = maybeWrapListenerForStackTrace(listener, request, threadPool, clusterService);
567573
final IndexShard shard = getShard(request);
568574
rewriteAndFetchShardRequest(shard, request, listener.delegateFailure((l, rewritten) -> {
569575
// fork the execution in the search thread pool
@@ -607,7 +613,7 @@ public void executeQueryPhase(ShardSearchRequest request, CancellableTask task,
607613
rewriteAndFetchShardRequest(
608614
shard,
609615
request,
610-
maybeWrapListenerForStackTrace(listener, request.getChannelVersion(), threadPool).delegateFailure((l, orig) -> {
616+
maybeWrapListenerForStackTrace(listener, request, threadPool, clusterService).delegateFailure((l, orig) -> {
611617
// check if we can shortcut the query phase entirely.
612618
if (orig.canReturnNullResponseIfMatchNoDocs()) {
613619
assert orig.scroll() == null;
@@ -805,9 +811,9 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, Cancella
805811
}
806812

807813
public void executeRankFeaturePhase(RankFeatureShardRequest request, SearchShardTask task, ActionListener<RankFeatureResult> listener) {
808-
listener = maybeWrapListenerForStackTrace(listener, request.getShardSearchRequest().getChannelVersion(), threadPool);
809814
final ReaderContext readerContext = findReaderContext(request.contextId(), request);
810815
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.getShardSearchRequest());
816+
listener = maybeWrapListenerForStackTrace(listener, shardSearchRequest, threadPool, clusterService);
811817
final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest));
812818
runAsync(getExecutor(readerContext.indexShard()), () -> {
813819
try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.RANK_FEATURE, false)) {
@@ -853,11 +859,11 @@ private QueryFetchSearchResult executeFetchPhase(ReaderContext reader, SearchCon
853859
public void executeQueryPhase(
854860
InternalScrollSearchRequest request,
855861
SearchShardTask task,
856-
ActionListener<ScrollQuerySearchResult> listener,
857-
TransportVersion version
862+
ActionListener<ScrollQuerySearchResult> listener
858863
) {
859-
listener = maybeWrapListenerForStackTrace(listener, version, threadPool);
860864
final LegacyReaderContext readerContext = (LegacyReaderContext) findReaderContext(request.contextId(), request);
865+
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null);
866+
listener = maybeWrapListenerForStackTrace(listener, shardSearchRequest, threadPool, clusterService);
861867
final Releasable markAsUsed;
862868
try {
863869
markAsUsed = readerContext.markAsUsed(getScrollKeepAlive(request.scroll()));
@@ -867,7 +873,6 @@ public void executeQueryPhase(
867873
throw e;
868874
}
869875
runAsync(getExecutor(readerContext.indexShard()), () -> {
870-
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null);
871876
try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.QUERY, false);) {
872877
var opsListener = searchContext.indexShard().getSearchOperationListener();
873878
final long beforeQueryTime = System.nanoTime();
@@ -899,15 +904,10 @@ public void executeQueryPhase(
899904
* It is the responsibility of the caller to ensure that the ref count is correctly decremented
900905
* when the object is no longer needed.
901906
*/
902-
public void executeQueryPhase(
903-
QuerySearchRequest request,
904-
SearchShardTask task,
905-
ActionListener<QuerySearchResult> listener,
906-
TransportVersion version
907-
) {
908-
listener = maybeWrapListenerForStackTrace(listener, version, threadPool);
907+
public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, ActionListener<QuerySearchResult> listener) {
909908
final ReaderContext readerContext = findReaderContext(request.contextId(), request.shardSearchRequest());
910909
final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(request.shardSearchRequest());
910+
listener = maybeWrapListenerForStackTrace(listener, shardSearchRequest, threadPool, clusterService);
911911
final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest));
912912
rewriteAndFetchShardRequest(readerContext.indexShard(), shardSearchRequest, listener.delegateFailure((l, rewritten) -> {
913913
// fork the execution in the search thread pool

server/src/test/java/org/elasticsearch/search/SearchServiceSingleNodeTests.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2630,8 +2630,7 @@ public void testDfsQueryPhaseRewrite() {
26302630
service.executeQueryPhase(
26312631
new QuerySearchRequest(null, context.id(), request, new AggregatedDfs(Map.of(), Map.of(), 10)),
26322632
new SearchShardTask(42L, "", "", "", null, emptyMap()),
2633-
plainActionFuture,
2634-
TransportVersion.current()
2633+
plainActionFuture
26352634
);
26362635

26372636
plainActionFuture.actionGet();

0 commit comments

Comments
 (0)