Skip to content

Commit 24c43bb

Browse files
authored
Fix ArrayIndexOutOfBoundsException in fetch phase with partial results (#144385) (#145002)
When a search fetch phase times out on a shard with allow_partial_search_results: true, the coordinating node can hit an ArrayIndexOutOfBoundsException in SearchPhaseController.getHits() or SearchPhaseController.mergeSuggest(). This is caused by: FetchPhaseDocsIterator.iterate() — on timeout, used System.arraycopy(searchHits, 0, partial, 0, i) to build the partial result. Because docs are iterated in doc-id order but placed at their original (score-sorted) positions, this copy produced a corrupted array. It included nulls at unfilled positions and dropped valid hits stored beyond index i. Fixed by replacing the arraycopy with a stripNulls method that returns a compact array of only the successfully fetched hits. SearchPhaseController.getHits() / mergeSuggest() — the query phase promises N docs from a shard, but the fetch phase may return fewer after a timeout. The merge loop used a counter to index into the fetch result array without checking bounds, causing ArrayIndexOutOfBoundsException. Fixed by adding a bounds check that skips entries when the counter exceeds the available hits.
1 parent 3c680f2 commit 24c43bb

File tree

5 files changed

+155
-7
lines changed

5 files changed

+155
-7
lines changed

docs/changelog/144385.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
area: Search
2+
issues:
3+
- 140495
4+
pr: 144385
5+
summary: Fix `ArrayIndexOutOfBoundsException` in fetch phase with partial results
6+
type: bug

server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,10 @@ private static void mergeSuggest(
263263
}
264264
FetchSearchResult fetchResult = searchResultProvider.fetchResult();
265265
final int index = fetchResult.counterGetAndIncrement();
266-
assert index < fetchResult.hits().getHits().length
267-
: "not enough hits fetched. index [" + index + "] length: " + fetchResult.hits().getHits().length;
266+
if (index >= fetchResult.hits().getHits().length) {
267+
// the fetch phase on this shard timed out and returned partial results
268+
continue;
269+
}
268270
SearchHit hit = fetchResult.hits().getHits()[index];
269271
CompletionSuggestion.Entry.Option suggestOption = suggestionOptions.get(scoreDocIndex - currentOffset);
270272
hit.score(shardDoc.score);
@@ -316,8 +318,10 @@ private static SearchHits getHits(
316318
}
317319
FetchSearchResult fetchResult = fetchResultProvider.fetchResult();
318320
final int index = fetchResult.counterGetAndIncrement();
319-
assert index < fetchResult.hits().getHits().length
320-
: "not enough hits fetched. index [" + index + "] length: " + fetchResult.hits().getHits().length;
321+
if (index >= fetchResult.hits().getHits().length) {
322+
// the fetch phase on this shard timed out and returned partial results
323+
continue;
324+
}
321325
SearchHit searchHit = fetchResult.hits().getHits()[index];
322326
searchHit.shard(fetchResult.getSearchShardTarget());
323327
if (shardDoc instanceof RankDoc) {

server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import java.io.IOException;
2323
import java.util.Arrays;
24+
import java.util.Objects;
2425

2526
/**
2627
* Given a set of doc ids and an index reader, sorts the docs by id, splits the sorted
@@ -93,9 +94,7 @@ public final SearchHit[] iterate(
9394
}
9495
SearchTimeoutException.handleTimeout(allowPartialResults, shardTarget, querySearchResult);
9596
assert allowPartialResults;
96-
SearchHit[] partialSearchHits = new SearchHit[i];
97-
System.arraycopy(searchHits, 0, partialSearchHits, 0, i);
98-
return partialSearchHits;
97+
return stripNulls(searchHits);
9998
}
10099
}
101100
} catch (SearchTimeoutException e) {
@@ -107,6 +106,15 @@ public final SearchHit[] iterate(
107106
return searchHits;
108107
}
109108

109+
private static SearchHit[] stripNulls(SearchHit[] searchHits) {
110+
for (SearchHit hit : searchHits) {
111+
if (hit == null) {
112+
return Arrays.stream(searchHits).filter(Objects::nonNull).toArray(SearchHit[]::new);
113+
}
114+
}
115+
return searchHits;
116+
}
117+
110118
private static void purgeSearchHits(SearchHit[] searchHits) {
111119
for (SearchHit searchHit : searchHits) {
112120
if (searchHit != null) {

server/src/test/java/org/elasticsearch/action/search/SearchPhaseControllerTests.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,81 @@ public void testFailConsumeAggs() throws Exception {
14171417
}
14181418
}
14191419

1420+
public void testMergeWithPartialFetchResults() {
1421+
int nShards = 3;
1422+
int hitsPerShard = 5;
1423+
AtomicArray<SearchPhaseResult> queryResults = new AtomicArray<>(nShards);
1424+
for (int shardIndex = 0; shardIndex < nShards; shardIndex++) {
1425+
SearchShardTarget target = new SearchShardTarget("", new ShardId("", "", shardIndex), null);
1426+
QuerySearchResult qsr = new QuerySearchResult(new ShardSearchContextId("", shardIndex), target, null);
1427+
ScoreDoc[] scoreDocs = new ScoreDoc[hitsPerShard];
1428+
for (int i = 0; i < hitsPerShard; i++) {
1429+
scoreDocs[i] = new ScoreDoc(i, hitsPerShard - i);
1430+
}
1431+
qsr.topDocs(new TopDocsAndMaxScore(new TopDocs(new TotalHits(hitsPerShard, Relation.EQUAL_TO), scoreDocs), hitsPerShard), null);
1432+
qsr.size(hitsPerShard * nShards);
1433+
qsr.setShardIndex(shardIndex);
1434+
queryResults.set(shardIndex, qsr);
1435+
}
1436+
try {
1437+
TopDocsStats topDocsStats = new TopDocsStats(SearchContext.TRACK_TOTAL_HITS_ACCURATE);
1438+
List<TopDocs> bufferedTopDocs = new ArrayList<>();
1439+
for (SearchPhaseResult result : queryResults.asList()) {
1440+
QuerySearchResult qsr = result.queryResult();
1441+
TopDocsAndMaxScore td = qsr.consumeTopDocs();
1442+
topDocsStats.add(td, qsr.searchTimedOut(), qsr.terminatedEarly());
1443+
SearchPhaseController.setShardIndex(td.topDocs, qsr.getShardIndex());
1444+
bufferedTopDocs.add(td.topDocs);
1445+
}
1446+
SearchPhaseController.ReducedQueryPhase reducedQueryPhase = SearchPhaseController.reducedQueryPhase(
1447+
queryResults.asList(),
1448+
InternalAggregations.EMPTY,
1449+
bufferedTopDocs,
1450+
topDocsStats,
1451+
0,
1452+
false,
1453+
null
1454+
);
1455+
ScoreDoc[] scoreDocs = reducedQueryPhase.sortedTopDocs().scoreDocs();
1456+
assertThat(scoreDocs.length, greaterThan(0));
1457+
1458+
AtomicArray<SearchPhaseResult> fetchResults = new AtomicArray<>(nShards);
1459+
for (int shardIndex = 0; shardIndex < nShards; shardIndex++) {
1460+
SearchShardTarget target = new SearchShardTarget("", new ShardId("", "", shardIndex), null);
1461+
FetchSearchResult fsr = new FetchSearchResult(new ShardSearchContextId("", shardIndex), target);
1462+
int shardHitCount = 0;
1463+
for (ScoreDoc sd : scoreDocs) {
1464+
if (sd.shardIndex == shardIndex) {
1465+
shardHitCount++;
1466+
}
1467+
}
1468+
// simulate a fetch timeout: shard 0 returns fewer hits than expected
1469+
int fetchedCount = (shardIndex == 0 && shardHitCount > 0) ? shardHitCount - 1 : shardHitCount;
1470+
SearchHit[] hits = new SearchHit[fetchedCount];
1471+
int idx = 0;
1472+
for (ScoreDoc sd : scoreDocs) {
1473+
if (sd.shardIndex == shardIndex && idx < fetchedCount) {
1474+
hits[idx++] = SearchHit.unpooled(sd.doc, "");
1475+
}
1476+
}
1477+
fsr.shardResult(SearchHits.unpooled(hits, new TotalHits(fetchedCount, Relation.EQUAL_TO), Float.NaN), null);
1478+
fetchResults.set(shardIndex, fsr);
1479+
}
1480+
try (SearchResponseSections mergedResponse = SearchPhaseController.merge(false, reducedQueryPhase, fetchResults)) {
1481+
// the merged response should not contain more hits than available fetch results
1482+
assertThat(mergedResponse.hits().getHits().length, lessThan(scoreDocs.length));
1483+
for (SearchHit hit : mergedResponse.hits().getHits()) {
1484+
assertNotNull(hit);
1485+
assertNotNull(hit.getShard());
1486+
}
1487+
} finally {
1488+
fetchResults.asList().forEach(RefCounted::decRef);
1489+
}
1490+
} finally {
1491+
queryResults.asList().forEach(RefCounted::decRef);
1492+
}
1493+
}
1494+
14201495
private static class AssertingCircuitBreaker extends NoopCircuitBreaker {
14211496
private final AtomicBoolean shouldBreak = new AtomicBoolean(false);
14221497

server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
import org.apache.lucene.index.LeafReaderContext;
1717
import org.apache.lucene.store.Directory;
1818
import org.apache.lucene.tests.index.RandomIndexWriter;
19+
import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy;
1920
import org.elasticsearch.search.SearchHit;
21+
import org.elasticsearch.search.internal.ContextIndexSearcher;
2022
import org.elasticsearch.search.query.QuerySearchResult;
2123
import org.elasticsearch.test.ESTestCase;
2224

@@ -28,6 +30,7 @@
2830
import static org.hamcrest.Matchers.containsString;
2931
import static org.hamcrest.Matchers.equalTo;
3032
import static org.hamcrest.Matchers.greaterThan;
33+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
3134
import static org.hamcrest.Matchers.instanceOf;
3235
import static org.hamcrest.Matchers.lessThan;
3336

@@ -137,6 +140,58 @@ protected SearchHit nextDoc(int doc) {
137140
directory.close();
138141
}
139142

143+
public void testTimeoutReturnsCompactPartialResults() throws IOException {
144+
int docCount = 400;
145+
Directory directory = newDirectory();
146+
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
147+
for (int i = 0; i < docCount; i++) {
148+
Document doc = new Document();
149+
doc.add(new StringField("field", "foo", Field.Store.NO));
150+
writer.addDocument(doc);
151+
if (i % 50 == 0) {
152+
writer.commit();
153+
}
154+
}
155+
writer.commit();
156+
IndexReader reader = writer.getReader();
157+
writer.close();
158+
159+
ContextIndexSearcher searcher = new ContextIndexSearcher(reader, null, null, TrivialQueryCachingPolicy.NEVER, randomBoolean());
160+
161+
// deliberately unsorted doc ids so that the doc-id-sorted iteration order
162+
// differs from the original order
163+
int[] docs = new int[] { 250, 10, 150, 50, 300, 100, 200, 350 };
164+
// in doc-id order: 10, 50, 100, 150, 200, ... timeout at doc 200
165+
final int timeoutAfterDocId = 200;
166+
167+
FetchPhaseDocsIterator it = new FetchPhaseDocsIterator() {
168+
@Override
169+
protected void setNextReader(LeafReaderContext ctx, int[] docsInLeaf) {}
170+
171+
@Override
172+
protected SearchHit nextDoc(int doc) {
173+
if (doc == timeoutAfterDocId) {
174+
searcher.throwTimeExceededException();
175+
}
176+
return new SearchHit(doc);
177+
}
178+
};
179+
180+
SearchHit[] hits = it.iterate(null, reader, docs, true, new QuerySearchResult());
181+
182+
// the returned array is compact — no null entries, shorter than input
183+
assertThat(hits.length, greaterThan(0));
184+
assertThat(hits.length, lessThan(docs.length));
185+
for (SearchHit hit : hits) {
186+
assertNotNull(hit);
187+
assertThat(hit.docId(), greaterThanOrEqualTo(0));
188+
hit.decRef();
189+
}
190+
191+
reader.close();
192+
directory.close();
193+
}
194+
140195
private static int[] randomDocIds(int maxDoc) {
141196
List<Integer> integers = new ArrayList<>();
142197
int v = 0;

0 commit comments

Comments
 (0)