diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index a3ab206191f9e..d4ec647b992b5 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -46,6 +46,7 @@ public class FsDirectoryFactory implements IndexStorePlugin.DirectoryFactory { private static final Logger Log = LogManager.getLogger(FsDirectoryFactory.class); private static final FeatureFlag MADV_RANDOM_FEATURE_FLAG = new FeatureFlag("madv_random"); + private static final FeatureFlag TMP_FDT_NO_MMAP_FEATURE_FLAG = new FeatureFlag("tmp_fdt_no_mmap"); public static final Setting INDEX_LOCK_FACTOR_SETTING = new Setting<>("index.store.fs.fs_lock", "native", (s) -> { return switch (s) { @@ -222,7 +223,7 @@ static boolean useDelegate(String name, IOContext ioContext) { } final LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(getExtension(name)); - if (extension == null || extension.shouldMmap() == false) { + if (extension == null || extension.shouldMmap() == false || avoidDelegateForFdtTempFiles(name, extension)) { // Other files are either less performance-sensitive (e.g. stored field index, norms metadata) // or are large and have a random access pattern and mmap leads to page cache trashing // (e.g. stored fields and term vectors). @@ -231,6 +232,39 @@ static boolean useDelegate(String name, IOContext ioContext) { return true; } + /** + * Force not using mmap if file is tmp fdt file. + * The tmp fdt file only gets created when flushing stored + * fields to disk and index sorting is active. + *

+ * In Lucene, the SortingStoredFieldsConsumer first + * flushes stored fields to disk in tmp files in unsorted order and + * uncompressed format. Then the tmp file gets a full integrity check, + * then the stored values are read from the tmp in the order of + * the index sorting in the segment, the order in which this happens + * from the perspective of tmp fdt file is random. After that, + * the tmp files are removed. + *

+ * If the machine Elasticsearch runs on has sufficient memory the i/o pattern + * that SortingStoredFieldsConsumer actually benefits from using mmap. + * However, in cases when memory scarce, this pattern can cause page faults often. + * Doing more harm than not using mmap. + *

+ * As part of flushing stored disk when indexing sorting is active, + * three tmp files are created, fdm (metadata), fdx (index) and + * fdt (contains stored field data). The first two files are small and + * mmap-ing that should still be ok even is memory is scarce. + * The fdt file is large and tends to cause more page faults when memory is scarce. + * + * @param name The name of the file in Lucene index + * @param extension The extension of the in Lucene index + * @return whether to avoid using delegate if the file is a tmp fdt file. + */ + static boolean avoidDelegateForFdtTempFiles(String name, LuceneFilesExtensions extension) { + // NOTE, for now gated behind feature flag to observe impact of this change in benchmarks only: + return TMP_FDT_NO_MMAP_FEATURE_FLAG.isEnabled() && extension == LuceneFilesExtensions.TMP && name.contains("fdt"); + } + MMapDirectory getDelegate() { return delegate; } diff --git a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java index 167e151c519ee..bbb78053e7518 100644 --- a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java @@ -69,7 +69,10 @@ public void testPreload() throws IOException { assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", newIOContext(random()))); assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM)); assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.tmp", newIOContext(random()))); - assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random()))); + assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random()))); + assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdt__1.tmp", newIOContext(random()))); + assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdm__0.tmp", newIOContext(random()))); + assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdx__4.tmp", newIOContext(random()))); MMapDirectory delegate = hybridDirectory.getDelegate(); assertThat(delegate, Matchers.instanceOf(MMapDirectory.class)); var func = fsDirectoryFactory.preLoadFuncMap.get(delegate);