From 94c60c42b03dcd4182b47de1c17df6f1a05609e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 21 Jul 2025 13:03:00 +0200 Subject: [PATCH] Tests: Add logging to FieldSortIT (#131558) In order to better understand the infrequent failures in #129445 and in the hope to reproduce the issue better, this PR adds logging around which shards and nodes documents end up in FieldSortIT#testSortMixedFieldTypes and increases the log level for this test suite in the o.e.search packages to TRACE and in some action.search packages to DEBUG to better understand where exceptions are thrown and to better trace how resources are released after that. Relates to #129445 --- .../search/sort/FieldSortIT.java | 27 +++++++++++++++- .../action/search/SearchPhaseController.java | 31 ++++++++++++------- .../SearchQueryThenFetchAsyncAction.java | 2 ++ 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java index 8509f883dfecd..5778447b91bfd 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java @@ -9,6 +9,7 @@ package org.elasticsearch.search.sort; +import org.apache.http.util.EntityUtils; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; @@ -20,6 +21,9 @@ import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.ShardSearchFailure; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.RestClient; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; @@ -36,6 +40,7 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalSettingsPlugin; +import org.elasticsearch.test.junit.annotations.TestIssueLogging; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; @@ -82,6 +87,12 @@ import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; +@TestIssueLogging( + issueUrl = "https://github.com/elastic/elasticsearch/issues/129445", + value = "org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction:DEBUG," + + "org.elasticsearch.action.search.SearchPhaseController:DEBUG," + + "org.elasticsearch.search:TRACE" +) public class FieldSortIT extends ESIntegTestCase { public static class CustomScriptPlugin extends MockScriptPlugin { @Override @@ -110,6 +121,10 @@ protected Collection> nodePlugins() { return Arrays.asList(InternalSettingsPlugin.class, CustomScriptPlugin.class); } + protected boolean addMockHttpTransport() { + return false; + } + public void testIssue8226() { int numIndices = between(5, 10); final boolean useMapping = randomBoolean(); @@ -2242,7 +2257,7 @@ public void testLongSortOptimizationCorrectResults() { ); } - public void testSortMixedFieldTypes() { + public void testSortMixedFieldTypes() throws IOException { assertAcked( prepareCreate("index_long").setMapping("foo", "type=long"), prepareCreate("index_integer").setMapping("foo", "type=integer"), @@ -2256,6 +2271,16 @@ public void testSortMixedFieldTypes() { prepareIndex("index_keyword").setId("1").setSource("foo", "123").get(); refresh(); + // for debugging, we try to see where the documents are located + try (RestClient restClient = createRestClient()) { + Request checkShardsRequest = new Request( + "GET", + "/_cat/shards/index_long,index_double,index_keyword?format=json&h=index,node,shard,prirep,state,docs,index" + ); + Response response = restClient.performRequest(checkShardsRequest); + logger.info("FieldSortIT#testSortMixedFieldTypes document distribution: " + EntityUtils.toString(response.getEntity())); + } + { // mixing long and integer types is ok, as we convert integer sort to long sort assertNoFailures(prepareSearch("index_long", "index_integer").addSort(new FieldSortBuilder("foo")).setSize(10)); } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java index 74c988ef5ffd5..e787f4fede46f 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java @@ -9,6 +9,8 @@ package org.elasticsearch.action.search; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; @@ -72,6 +74,8 @@ public final class SearchPhaseController { + private static final Logger logger = LogManager.getLogger(SearchPhaseController.class); + private final BiFunction< Supplier, AggregatorFactories.Builder, @@ -153,17 +157,22 @@ static TopDocs mergeTopDocs(Collection results, int topN, int from) { return topDocs; } final TopDocs mergedTopDocs; - if (topDocs instanceof TopFieldGroups firstTopDocs) { - final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); - TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]); - mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false); - } else if (topDocs instanceof TopFieldDocs firstTopDocs) { - TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]); - final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); - mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs); - } else { - final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]); - mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs); + try { + if (topDocs instanceof TopFieldGroups firstTopDocs) { + final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); + TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]); + mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false); + } else if (topDocs instanceof TopFieldDocs firstTopDocs) { + TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]); + final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); + mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs); + } else { + final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]); + mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs); + } + } catch (IllegalArgumentException e) { + logger.debug("Failed to merge top docs: ", e); + throw e; } return mergedTopDocs; } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java index 8b6e7a4a132da..8827ed239829c 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java @@ -454,6 +454,7 @@ protected void doRun(Map shardIndexMap) { executeAsSingleRequest(routing, request.shards.get(0)); return; } + String nodeId = routing.nodeId(); final Transport.Connection connection; try { connection = getConnection(routing.clusterAlias(), routing.nodeId()); @@ -503,6 +504,7 @@ public void handleResponse(NodeQueryResponse response) { @Override public void handleException(TransportException e) { Exception cause = (Exception) ExceptionsHelper.unwrapCause(e); + logger.debug("handling node search exception coming from [" + nodeId + "]", cause); if (e instanceof SendRequestTransportException || cause instanceof TaskCancelledException) { // two possible special cases here where we do not want to fail the phase: // failure to send out the request -> handle things the same way a shard would fail with unbatched execution