Skip to content

Commit 5634676

Browse files
authored
Conditionally use sequential stored field reader in LuceneSyntheticSourceChangesSnapshot (#121636) (#122202)
Improve LuceneSyntheticSourceChangesSnapshot by triggering to use a sequential stored field reader if docids are dense. This is done by computing for which docids to synthesize recovery source for. If the requested docids are dense and monotonic increasing a sequential stored field reader is used, which provided recovery source for many documents without repeatedly de-compressing the same block of stored fields.
1 parent d5d7937 commit 5634676

File tree

1 file changed

+24
-2
lines changed

1 file changed

+24
-2
lines changed

server/src/main/java/org/elasticsearch/index/engine/LuceneSyntheticSourceChangesSnapshot.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
package org.elasticsearch.index.engine;
1111

12+
import com.carrotsearch.hppc.IntArrayList;
13+
1214
import org.apache.lucene.index.LeafReaderContext;
1315
import org.apache.lucene.search.FieldDoc;
1416
import org.apache.lucene.search.ScoreDoc;
@@ -191,8 +193,28 @@ private Translog.Operation[] loadDocuments(List<SearchRecord> documentRecords) t
191193
maxDoc = leafReaderContext.reader().maxDoc();
192194
} while (docRecord.docID() >= docBase + maxDoc);
193195

194-
leafFieldLoader = storedFieldLoader.getLoader(leafReaderContext, null);
195-
leafSourceLoader = sourceLoader.leaf(leafReaderContext.reader(), null);
196+
// TODO: instead of building an array, consider just checking whether doc ids are dense.
197+
// Note, field loaders then would lose the ability to optionally eagerly loading values.
198+
IntArrayList nextDocIds = new IntArrayList();
199+
for (int j = i; j < documentRecords.size(); j++) {
200+
var record = documentRecords.get(j);
201+
if (record.isTombstone()) {
202+
continue;
203+
}
204+
int docID = record.docID();
205+
if (docID >= docBase + maxDoc) {
206+
break;
207+
}
208+
int segmentDocID = docID - docBase;
209+
nextDocIds.add(segmentDocID);
210+
}
211+
212+
// This computed doc ids arrays us used by stored field loader as a heuristic to determine whether to use a sequential
213+
// stored field reader (which bulk loads stored fields and avoids decompressing the same blocks multiple times). For
214+
// source loader, it is also used as a heuristic for bulk reading doc values (E.g. SingletonDocValuesLoader).
215+
int[] nextDocIdArray = nextDocIds.toArray();
216+
leafFieldLoader = storedFieldLoader.getLoader(leafReaderContext, nextDocIdArray);
217+
leafSourceLoader = sourceLoader.leaf(leafReaderContext.reader(), nextDocIdArray);
196218
setNextSourceMetadataReader(leafReaderContext);
197219
}
198220
int segmentDocID = docRecord.docID() - docBase;

0 commit comments

Comments
 (0)