Skip to content

Commit 1aed2bc

Browse files
authored
Address es819 tsdb doc values format performance bug (#135505)
* The DISIAccumulator should only be used during merging. * The tmp files created by DISIAccumulator and OffsetsAccumulator shouldn't use mmap based lucene directory. Closes #135340
1 parent bc7da67 commit 1aed2bc

File tree

4 files changed

+19
-4
lines changed

4 files changed

+19
-4
lines changed

docs/changelog/135505.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 135505
2+
summary: Address es819 tsdb doc values format performance bug
3+
area: Codec
4+
type: bug
5+
issues:
6+
- 135340

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
175175
meta.writeVInt(Math.toIntExact(maxOrd));
176176
meta.writeByte((byte) ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT);
177177
values = valuesProducer.getSortedNumeric(field);
178-
if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
178+
if (valuesProducer.mergeStats.supported() && numDocsWithValue < maxDoc) {
179179
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
180180
}
181181
DirectMonotonicWriter startDocs = DirectMonotonicWriter.getInstance(
@@ -211,7 +211,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
211211
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
212212
values = valuesProducer.getSortedNumeric(field);
213213
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
214-
if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
214+
if (valuesProducer.mergeStats.supported() && numDocsWithValue < maxDoc) {
215215
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
216216
}
217217
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {

server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ static boolean useDelegate(String name, IOContext ioContext) {
254254
}
255255

256256
/**
257-
* Force not using mmap if file is tmp fdt file.
257+
* Force not using mmap if file is a tmp fdt, disi or address-data file.
258258
* The tmp fdt file only gets created when flushing stored
259259
* fields to disk and index sorting is active.
260260
* <p>
@@ -277,12 +277,16 @@ static boolean useDelegate(String name, IOContext ioContext) {
277277
* mmap-ing that should still be ok even is memory is scarce.
278278
* The fdt file is large and tends to cause more page faults when memory is scarce.
279279
*
280+
* For disi and address-data files, in es819 tsdb doc values codec, docids and offsets are first written to a tmp file and
281+
* read and written into new segment.
282+
*
280283
* @param name The name of the file in Lucene index
281284
* @param extension The extension of the in Lucene index
282285
* @return whether to avoid using delegate if the file is a tmp fdt file.
283286
*/
284287
static boolean avoidDelegateForFdtTempFiles(String name, LuceneFilesExtensions extension) {
285-
return extension == LuceneFilesExtensions.TMP && name.contains("fdt");
288+
return extension == LuceneFilesExtensions.TMP
289+
&& (name.contains("fdt") || name.contains("disi") || name.contains("address-data"));
286290
}
287291

288292
MMapDirectory getDelegate() {

server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,15 @@ public void testPreload() throws IOException {
6464
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", newIOContext(random())));
6565
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM));
6666
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.tmp", newIOContext(random())));
67+
// Stored field tmp files that shouldn't preload:
6768
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random())));
6869
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdt__1.tmp", newIOContext(random())));
70+
// Stored field tmp files that should preload:
6971
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdm__0.tmp", newIOContext(random())));
7072
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdx__4.tmp", newIOContext(random())));
73+
// es819 tsdb doc values tmp files that shouldn't preload:
74+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.disi__0.tmp", newIOContext(random())));
75+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.address-data__0.tmp", newIOContext(random())));
7176
MMapDirectory delegate = hybridDirectory.getDelegate();
7277
assertThat(delegate, Matchers.instanceOf(MMapDirectory.class));
7378
var func = fsDirectoryFactory.preLoadFuncMap.get(delegate);

0 commit comments

Comments
 (0)