Skip to content

Commit e837ccd

Browse files
authored
[9.1] Address es819 tsdb doc values format performance bug (elastic#135535)
Backporting elastic#135505 to 9.1 branch. * The DISIAccumulator should only be used during merging. * The tmp files created by DISIAccumulator and OffsetsAccumulator shouldn't use mmap based lucene directory. Closes elastic#135340
1 parent 9bd83f7 commit e837ccd

File tree

4 files changed

+18
-3
lines changed

4 files changed

+18
-3
lines changed

docs/changelog/135505.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 135505
2+
summary: Address es819 tsdb doc values format performance bug
3+
area: Codec
4+
type: bug
5+
issues:
6+
- 135340

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
167167
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
168168
values = valuesProducer.getSortedNumeric(field);
169169
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
170-
if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
170+
if (valuesProducer.mergeStats.supported() && numDocsWithValue < maxDoc) {
171171
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
172172
}
173173
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {

server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ static boolean useDelegate(String name, IOContext ioContext) {
261261
}
262262

263263
/**
264-
* Force not using mmap if file is tmp fdt file.
264+
* Force not using mmap if file is a tmp fdt, disi or address-data file.
265265
* The tmp fdt file only gets created when flushing stored
266266
* fields to disk and index sorting is active.
267267
* <p>
@@ -284,12 +284,16 @@ static boolean useDelegate(String name, IOContext ioContext) {
284284
* mmap-ing that should still be ok even is memory is scarce.
285285
* The fdt file is large and tends to cause more page faults when memory is scarce.
286286
*
287+
* For disi and address-data files, in es819 tsdb doc values codec, docids and offsets are first written to a tmp file and
288+
* read and written into new segment.
289+
*
287290
* @param name The name of the file in Lucene index
288291
* @param extension The extension of the in Lucene index
289292
* @return whether to avoid using delegate if the file is a tmp fdt file.
290293
*/
291294
static boolean avoidDelegateForFdtTempFiles(String name, LuceneFilesExtensions extension) {
292-
return extension == LuceneFilesExtensions.TMP && name.contains("fdt");
295+
return extension == LuceneFilesExtensions.TMP
296+
&& (name.contains("fdt") || name.contains("disi") || name.contains("address-data"));
293297
}
294298

295299
MMapDirectory getDelegate() {

server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,15 @@ public void testPreload() throws IOException {
6969
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", newIOContext(random())));
7070
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM));
7171
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.tmp", newIOContext(random())));
72+
// Stored field tmp files that shouldn't preload:
7273
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random())));
7374
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdt__1.tmp", newIOContext(random())));
75+
// Stored field tmp files that should preload:
7476
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdm__0.tmp", newIOContext(random())));
7577
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdx__4.tmp", newIOContext(random())));
78+
// es819 tsdb doc values tmp files that shouldn't preload:
79+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.disi__0.tmp", newIOContext(random())));
80+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.address-data__0.tmp", newIOContext(random())));
7681
MMapDirectory delegate = hybridDirectory.getDelegate();
7782
assertThat(delegate, Matchers.instanceOf(MMapDirectory.class));
7883
var func = fsDirectoryFactory.preLoadFuncMap.get(delegate);

0 commit comments

Comments
 (0)