From 7351544f2122cb70f3de1c2c4771cdbcf93112e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 11 Jul 2025 16:17:13 +0200 Subject: [PATCH 1/2] Add logging to FieldSortIT In order to better understand the infrequent failures in #129445 and in the hope to reproduce the issue better, this PR adds logging around which shards and nodes documents end up in FieldSortIT#testSortMixedFieldTypes and increases the log level for this test suite in the o.e.search packages to TRACE and in some action.search packages to DEBUG to better understand where exceptions are thrown and to better trace how resources are released after that. Relates to #129445 --- .../search/sort/FieldSortIT.java | 36 ++++++++++++++++++- .../action/search/SearchPhaseController.java | 31 ++++++++++------ .../SearchQueryThenFetchAsyncAction.java | 2 ++ 3 files changed, 57 insertions(+), 12 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java index 2cd610f204d9e..9dc0218ca103a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java @@ -9,6 +9,7 @@ package org.elasticsearch.search.sort; +import org.apache.http.util.EntityUtils; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; @@ -20,7 +21,11 @@ import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.ShardSearchFailure; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.RestClient; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.network.NetworkModule; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.index.fielddata.ScriptDocValues; @@ -36,6 +41,8 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalSettingsPlugin; +import org.elasticsearch.test.junit.annotations.TestIssueLogging; +import org.elasticsearch.transport.netty4.Netty4Plugin; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; @@ -84,6 +91,12 @@ import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; +@TestIssueLogging( + issueUrl = "https://github.com/elastic/elasticsearch/issues/129445", + value = "org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction:DEBUG," + + "org.elasticsearch.action.search.SearchPhaseController:DEBUG," + + "org.elasticsearch.search:TRACE" +) public class FieldSortIT extends ESIntegTestCase { public static class CustomScriptPlugin extends MockScriptPlugin { @Override @@ -112,6 +125,17 @@ protected Collection> nodePlugins() { return Arrays.asList(InternalSettingsPlugin.class, CustomScriptPlugin.class); } + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(NetworkModule.HTTP_TYPE_KEY, Netty4Plugin.NETTY_HTTP_TRANSPORT_NAME) + .build(); + } + + protected boolean addMockHttpTransport() { + return false; + } + public void testIssue8226() { int numIndices = between(5, 10); final boolean useMapping = randomBoolean(); @@ -2145,7 +2169,7 @@ public void testLongSortOptimizationCorrectResults() { ); } - public void testSortMixedFieldTypes() { + public void testSortMixedFieldTypes() throws IOException { assertAcked( prepareCreate("index_long").setMapping("foo", "type=long"), prepareCreate("index_integer").setMapping("foo", "type=integer"), @@ -2159,6 +2183,16 @@ public void testSortMixedFieldTypes() { prepareIndex("index_keyword").setId("1").setSource("foo", "123").get(); refresh(); + // for debugging, we try to see where the documents are located + try (RestClient restClient = createRestClient()) { + Request checkShardsRequest = new Request( + "GET", + "/_cat/shards/index_long,index_double,index_keyword?format=json&h=index,node,shard,prirep,state,docs,index" + ); + Response response = restClient.performRequest(checkShardsRequest); + logger.info("FieldSortIT#testSortMixedFieldTypes document distribution: " + EntityUtils.toString(response.getEntity())); + } + { // mixing long and integer types is ok, as we convert integer sort to long sort assertNoFailures(prepareSearch("index_long", "index_integer").addSort(new FieldSortBuilder("foo")).setSize(10)); } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java index 7339135fe93dc..2df4c05722908 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java @@ -9,6 +9,8 @@ package org.elasticsearch.action.search; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; @@ -72,6 +74,8 @@ public final class SearchPhaseController { + private static final Logger logger = LogManager.getLogger(SearchPhaseController.class); + private final BiFunction< Supplier, AggregatorFactories.Builder, @@ -153,17 +157,22 @@ static TopDocs mergeTopDocs(List results, int topN, int from) { return topDocs; } final TopDocs mergedTopDocs; - if (topDocs instanceof TopFieldGroups firstTopDocs) { - final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); - TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]); - mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false); - } else if (topDocs instanceof TopFieldDocs firstTopDocs) { - TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]); - final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); - mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs); - } else { - final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]); - mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs); + try { + if (topDocs instanceof TopFieldGroups firstTopDocs) { + final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); + TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]); + mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false); + } else if (topDocs instanceof TopFieldDocs firstTopDocs) { + TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]); + final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); + mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs); + } else { + final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]); + mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs); + } + } catch (IllegalArgumentException e) { + logger.debug("Failed to merge top docs: ", e); + throw e; } return mergedTopDocs; } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java index f0be39208c902..8d763698c63c0 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java @@ -457,6 +457,7 @@ protected void doRun(Map shardIndexMap) { executeAsSingleRequest(routing, request.shards.getFirst()); return; } + String nodeId = routing.nodeId(); final Transport.Connection connection; try { connection = getConnection(routing.clusterAlias(), routing.nodeId()); @@ -508,6 +509,7 @@ public void handleResponse(NodeQueryResponse response) { @Override public void handleException(TransportException e) { Exception cause = (Exception) ExceptionsHelper.unwrapCause(e); + logger.debug("handling node search exception coming from [" + nodeId + "]", cause); if (e instanceof SendRequestTransportException || cause instanceof TaskCancelledException) { // two possible special cases here where we do not want to fail the phase: // failure to send out the request -> handle things the same way a shard would fail with unbatched execution From cd1dd08596a3e7236089702f6183bede4ab9c020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 21 Jul 2025 10:51:48 +0200 Subject: [PATCH 2/2] Remove node settings --- .../java/org/elasticsearch/search/sort/FieldSortIT.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java index 9dc0218ca103a..97da362eebe82 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java @@ -25,7 +25,6 @@ import org.elasticsearch.client.Response; import org.elasticsearch.client.RestClient; import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.common.network.NetworkModule; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.index.fielddata.ScriptDocValues; @@ -42,7 +41,6 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalSettingsPlugin; import org.elasticsearch.test.junit.annotations.TestIssueLogging; -import org.elasticsearch.transport.netty4.Netty4Plugin; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; @@ -125,13 +123,6 @@ protected Collection> nodePlugins() { return Arrays.asList(InternalSettingsPlugin.class, CustomScriptPlugin.class); } - protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { - return Settings.builder() - .put(super.nodeSettings(nodeOrdinal, otherSettings)) - .put(NetworkModule.HTTP_TYPE_KEY, Netty4Plugin.NETTY_HTTP_TRANSPORT_NAME) - .build(); - } - protected boolean addMockHttpTransport() { return false; }