Skip to content

Commit de97c30

Browse files
committed
Add logging to FieldSortIT
In order to better understand the infrequent failures in #129445 and in the hope to reproduce the issue better, this PR adds logging around which shards and nodes documents end up in FieldSortIT#testSortMixedFieldTypes and increases the log level for this test suite in the o.e.search packages to TRACE and in some action.search packages to DEBUG to better understand where exceptions are thrown and to better trace how resources are released after that. Relates to #129445
1 parent f664cf5 commit de97c30

File tree

3 files changed

+57
-12
lines changed

3 files changed

+57
-12
lines changed

server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.search.sort;
1111

12+
import org.apache.http.util.EntityUtils;
1213
import org.apache.lucene.tests.util.TestUtil;
1314
import org.apache.lucene.util.BytesRef;
1415
import org.apache.lucene.util.UnicodeUtil;
@@ -20,7 +21,11 @@
2021
import org.elasticsearch.action.search.SearchRequestBuilder;
2122
import org.elasticsearch.action.search.SearchResponse;
2223
import org.elasticsearch.action.search.ShardSearchFailure;
24+
import org.elasticsearch.client.Request;
25+
import org.elasticsearch.client.Response;
26+
import org.elasticsearch.client.RestClient;
2327
import org.elasticsearch.cluster.metadata.IndexMetadata;
28+
import org.elasticsearch.common.network.NetworkModule;
2429
import org.elasticsearch.common.settings.Settings;
2530
import org.elasticsearch.core.Strings;
2631
import org.elasticsearch.index.fielddata.ScriptDocValues;
@@ -36,6 +41,8 @@
3641
import org.elasticsearch.search.SearchHits;
3742
import org.elasticsearch.test.ESIntegTestCase;
3843
import org.elasticsearch.test.InternalSettingsPlugin;
44+
import org.elasticsearch.test.junit.annotations.TestIssueLogging;
45+
import org.elasticsearch.transport.netty4.Netty4Plugin;
3946
import org.elasticsearch.xcontent.XContentBuilder;
4047
import org.elasticsearch.xcontent.XContentFactory;
4148
import org.elasticsearch.xcontent.XContentType;
@@ -84,6 +91,12 @@
8491
import static org.hamcrest.Matchers.not;
8592
import static org.hamcrest.Matchers.nullValue;
8693

94+
@TestIssueLogging(
95+
issueUrl = "https://github.com/elastic/elasticsearch/issues/129445",
96+
value = "org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction:DEBUG,"
97+
+ "org.elasticsearch.action.search.SearchPhaseController:DEBUG,"
98+
+ "org.elasticsearch.search:TRACE"
99+
)
87100
public class FieldSortIT extends ESIntegTestCase {
88101
public static class CustomScriptPlugin extends MockScriptPlugin {
89102
@Override
@@ -112,6 +125,17 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
112125
return Arrays.asList(InternalSettingsPlugin.class, CustomScriptPlugin.class);
113126
}
114127

128+
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
129+
return Settings.builder()
130+
.put(super.nodeSettings(nodeOrdinal, otherSettings))
131+
.put(NetworkModule.HTTP_TYPE_KEY, Netty4Plugin.NETTY_HTTP_TRANSPORT_NAME)
132+
.build();
133+
}
134+
135+
protected boolean addMockHttpTransport() {
136+
return false;
137+
}
138+
115139
public void testIssue8226() {
116140
int numIndices = between(5, 10);
117141
final boolean useMapping = randomBoolean();
@@ -2145,7 +2169,7 @@ public void testLongSortOptimizationCorrectResults() {
21452169
);
21462170
}
21472171

2148-
public void testSortMixedFieldTypes() {
2172+
public void testSortMixedFieldTypes() throws IOException {
21492173
assertAcked(
21502174
prepareCreate("index_long").setMapping("foo", "type=long"),
21512175
prepareCreate("index_integer").setMapping("foo", "type=integer"),
@@ -2159,6 +2183,16 @@ public void testSortMixedFieldTypes() {
21592183
prepareIndex("index_keyword").setId("1").setSource("foo", "123").get();
21602184
refresh();
21612185

2186+
// for debugging, we try to see where the documents are located
2187+
try (RestClient restClient = createRestClient()) {
2188+
Request checkShardsRequest = new Request(
2189+
"GET",
2190+
"/_cat/shards/index_long,index_double,index_keyword?format=json&h=index,node,shard,prirep,state,docs,index"
2191+
);
2192+
Response response = restClient.performRequest(checkShardsRequest);
2193+
logger.info("FieldSortIT#testSortMixedFieldTypes document distribution: " + EntityUtils.toString(response.getEntity()));
2194+
}
2195+
21622196
{ // mixing long and integer types is ok, as we convert integer sort to long sort
21632197
assertNoFailures(prepareSearch("index_long", "index_integer").addSort(new FieldSortBuilder("foo")).setSize(10));
21642198
}

server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
package org.elasticsearch.action.search;
1111

12+
import org.apache.logging.log4j.LogManager;
13+
import org.apache.logging.log4j.Logger;
1214
import org.apache.lucene.search.FieldDoc;
1315
import org.apache.lucene.search.ScoreDoc;
1416
import org.apache.lucene.search.Sort;
@@ -72,6 +74,8 @@
7274

7375
public final class SearchPhaseController {
7476

77+
private static final Logger logger = LogManager.getLogger(SearchPhaseController.class);
78+
7579
private final BiFunction<
7680
Supplier<Boolean>,
7781
AggregatorFactories.Builder,
@@ -153,17 +157,22 @@ static TopDocs mergeTopDocs(List<TopDocs> results, int topN, int from) {
153157
return topDocs;
154158
}
155159
final TopDocs mergedTopDocs;
156-
if (topDocs instanceof TopFieldGroups firstTopDocs) {
157-
final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
158-
TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]);
159-
mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false);
160-
} else if (topDocs instanceof TopFieldDocs firstTopDocs) {
161-
TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]);
162-
final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
163-
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
164-
} else {
165-
final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]);
166-
mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs);
160+
try {
161+
if (topDocs instanceof TopFieldGroups firstTopDocs) {
162+
final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
163+
TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]);
164+
mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false);
165+
} else if (topDocs instanceof TopFieldDocs firstTopDocs) {
166+
TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]);
167+
final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields);
168+
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
169+
} else {
170+
final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]);
171+
mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs);
172+
}
173+
} catch (IllegalArgumentException e) {
174+
logger.debug("Failed to merge top docs: ", e);
175+
throw e;
167176
}
168177
return mergedTopDocs;
169178
}

server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,7 @@ protected void doRun(Map<SearchShardIterator, Integer> shardIndexMap) {
457457
executeAsSingleRequest(routing, request.shards.getFirst());
458458
return;
459459
}
460+
String nodeId = routing.nodeId();
460461
final Transport.Connection connection;
461462
try {
462463
connection = getConnection(routing.clusterAlias(), routing.nodeId());
@@ -508,6 +509,7 @@ public void handleResponse(NodeQueryResponse response) {
508509
@Override
509510
public void handleException(TransportException e) {
510511
Exception cause = (Exception) ExceptionsHelper.unwrapCause(e);
512+
logger.debug("handling node search exception coming from [" + nodeId + "]", cause);
511513
if (e instanceof SendRequestTransportException || cause instanceof TaskCancelledException) {
512514
// two possible special cases here where we do not want to fail the phase:
513515
// failure to send out the request -> handle things the same way a shard would fail with unbatched execution

0 commit comments

Comments
 (0)