diff --git a/docs/changelog/130308.yaml b/docs/changelog/130308.yaml new file mode 100644 index 0000000000000..f9386593744f1 --- /dev/null +++ b/docs/changelog/130308.yaml @@ -0,0 +1,5 @@ +pr: 130308 +summary: Force niofs for fdt tmp file read access when flushing stored fields +area: Logs +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index e76060b6c0dc8..4816bea1ba253 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -177,7 +177,7 @@ static boolean useDelegate(String name, IOContext ioContext) { } final LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(getExtension(name)); - if (extension == null || extension.shouldMmap() == false) { + if (extension == null || extension.shouldMmap() == false || avoidDelegateForFdtTempFiles(name, extension)) { // Other files are either less performance-sensitive (e.g. stored field index, norms metadata) // or are large and have a random access pattern and mmap leads to page cache trashing // (e.g. stored fields and term vectors). @@ -186,6 +186,38 @@ static boolean useDelegate(String name, IOContext ioContext) { return true; } + /** + * Force not using mmap if file is tmp fdt file. + * The tmp fdt file only gets created when flushing stored + * fields to disk and index sorting is active. + *
+ * In Lucene, the SortingStoredFieldsConsumer first
+ * flushes stored fields to disk in tmp files in unsorted order and
+ * uncompressed format. Then the tmp file gets a full integrity check,
+ * then the stored values are read from the tmp in the order of
+ * the index sorting in the segment, the order in which this happens
+ * from the perspective of tmp fdt file is random. After that,
+ * the tmp files are removed.
+ *
+ * If the machine Elasticsearch runs on has sufficient memory the i/o pattern
+ * that SortingStoredFieldsConsumer actually benefits from using mmap.
+ * However, in cases when memory scarce, this pattern can cause page faults often.
+ * Doing more harm than not using mmap.
+ *
+ * As part of flushing stored disk when indexing sorting is active, + * three tmp files are created, fdm (metadata), fdx (index) and + * fdt (contains stored field data). The first two files are small and + * mmap-ing that should still be ok even is memory is scarce. + * The fdt file is large and tends to cause more page faults when memory is scarce. + * + * @param name The name of the file in Lucene index + * @param extension The extension of the in Lucene index + * @return whether to avoid using delegate if the file is a tmp fdt file. + */ + static boolean avoidDelegateForFdtTempFiles(String name, LuceneFilesExtensions extension) { + return extension == LuceneFilesExtensions.TMP && name.contains("fdt"); + } + MMapDirectory getDelegate() { return delegate; } diff --git a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java index 46e0d0d31000e..e49b72f573900 100644 --- a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java @@ -61,7 +61,10 @@ public void testPreload() throws IOException { assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", newIOContext(random()))); assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM)); assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.tmp", newIOContext(random()))); - assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random()))); + assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random()))); + assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdt__1.tmp", newIOContext(random()))); + assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdm__0.tmp", newIOContext(random()))); + assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdx__4.tmp", newIOContext(random()))); MMapDirectory delegate = hybridDirectory.getDelegate(); assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class)); FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate;