Skip to content

Commit a89cdb4

Browse files
authored
[8.19] Address es819 tsdb doc values format performance bug (#135536)
Backporting #135505 to 8.19 branch. * The DISIAccumulator should only be used during merging. * The tmp files created by DISIAccumulator and OffsetsAccumulator shouldn't use mmap based lucene directory. Closes #135340
1 parent 16e1799 commit a89cdb4

File tree

4 files changed

+18
-3
lines changed

4 files changed

+18
-3
lines changed

docs/changelog/135505.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 135505
2+
summary: Address es819 tsdb doc values format performance bug
3+
area: Codec
4+
type: bug
5+
issues:
6+
- 135340

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
156156
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
157157
values = valuesProducer.getSortedNumeric(field);
158158
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
159-
if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
159+
if (valuesProducer.mergeStats.supported() && numDocsWithValue < maxDoc) {
160160
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
161161
}
162162
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {

server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ static boolean useDelegate(String name, IOContext ioContext) {
215215
}
216216

217217
/**
218-
* Force not using mmap if file is tmp fdt file.
218+
* Force not using mmap if file is a tmp fdt, disi or address-data file.
219219
* The tmp fdt file only gets created when flushing stored
220220
* fields to disk and index sorting is active.
221221
* <p>
@@ -238,12 +238,16 @@ static boolean useDelegate(String name, IOContext ioContext) {
238238
* mmap-ing that should still be ok even is memory is scarce.
239239
* The fdt file is large and tends to cause more page faults when memory is scarce.
240240
*
241+
* For disi and address-data files, in es819 tsdb doc values codec, docids and offsets are first written to a tmp file and
242+
* read and written into new segment.
243+
*
241244
* @param name The name of the file in Lucene index
242245
* @param extension The extension of the in Lucene index
243246
* @return whether to avoid using delegate if the file is a tmp fdt file.
244247
*/
245248
static boolean avoidDelegateForFdtTempFiles(String name, LuceneFilesExtensions extension) {
246-
return extension == LuceneFilesExtensions.TMP && name.contains("fdt");
249+
return extension == LuceneFilesExtensions.TMP
250+
&& (name.contains("fdt") || name.contains("disi") || name.contains("address-data"));
247251
}
248252

249253
MMapDirectory getDelegate() {

server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,15 @@ public void testPreload() throws IOException {
6161
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", newIOContext(random())));
6262
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM));
6363
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.tmp", newIOContext(random())));
64+
// Stored field tmp files that shouldn't preload:
6465
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random())));
6566
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdt__1.tmp", newIOContext(random())));
67+
// Stored field tmp files that should preload:
6668
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdm__0.tmp", newIOContext(random())));
6769
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdx__4.tmp", newIOContext(random())));
70+
// es819 tsdb doc values tmp files that shouldn't preload:
71+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.disi__0.tmp", newIOContext(random())));
72+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.address-data__0.tmp", newIOContext(random())));
6873
MMapDirectory delegate = hybridDirectory.getDelegate();
6974
assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class));
7075
FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate;

0 commit comments

Comments
 (0)