Skip to content

Commit b594528

Browse files
authored
Merge branch 'main' into ES91Int4VectorsScorer
2 parents 86c3bf9 + 41f6981 commit b594528

File tree

3 files changed

+42
-2
lines changed

3 files changed

+42
-2
lines changed

muted-tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,9 @@ tests:
580580
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeIT
581581
method: test
582582
issue: https://github.com/elastic/elasticsearch/issues/129819
583+
- class: org.elasticsearch.index.engine.ThreadPoolMergeExecutorServiceDiskSpaceTests
584+
method: testAbortingOrRunningMergeTaskHoldsUpBudget
585+
issue: https://github.com/elastic/elasticsearch/issues/129823
583586

584587
# Examples:
585588
#

server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ public class FsDirectoryFactory implements IndexStorePlugin.DirectoryFactory {
4646

4747
private static final Logger Log = LogManager.getLogger(FsDirectoryFactory.class);
4848
private static final FeatureFlag MADV_RANDOM_FEATURE_FLAG = new FeatureFlag("madv_random");
49+
private static final FeatureFlag TMP_FDT_NO_MMAP_FEATURE_FLAG = new FeatureFlag("tmp_fdt_no_mmap");
4950

5051
public static final Setting<LockFactory> INDEX_LOCK_FACTOR_SETTING = new Setting<>("index.store.fs.fs_lock", "native", (s) -> {
5152
return switch (s) {
@@ -222,7 +223,7 @@ static boolean useDelegate(String name, IOContext ioContext) {
222223
}
223224

224225
final LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(getExtension(name));
225-
if (extension == null || extension.shouldMmap() == false) {
226+
if (extension == null || extension.shouldMmap() == false || avoidDelegateForFdtTempFiles(name, extension)) {
226227
// Other files are either less performance-sensitive (e.g. stored field index, norms metadata)
227228
// or are large and have a random access pattern and mmap leads to page cache trashing
228229
// (e.g. stored fields and term vectors).
@@ -231,6 +232,39 @@ static boolean useDelegate(String name, IOContext ioContext) {
231232
return true;
232233
}
233234

235+
/**
236+
* Force not using mmap if file is tmp fdt file.
237+
* The tmp fdt file only gets created when flushing stored
238+
* fields to disk and index sorting is active.
239+
* <p>
240+
* In Lucene, the <code>SortingStoredFieldsConsumer</code> first
241+
* flushes stored fields to disk in tmp files in unsorted order and
242+
* uncompressed format. Then the tmp file gets a full integrity check,
243+
* then the stored values are read from the tmp in the order of
244+
* the index sorting in the segment, the order in which this happens
245+
* from the perspective of tmp fdt file is random. After that,
246+
* the tmp files are removed.
247+
* <p>
248+
* If the machine Elasticsearch runs on has sufficient memory the i/o pattern
249+
* that <code>SortingStoredFieldsConsumer</code> actually benefits from using mmap.
250+
* However, in cases when memory scarce, this pattern can cause page faults often.
251+
* Doing more harm than not using mmap.
252+
* <p>
253+
* As part of flushing stored disk when indexing sorting is active,
254+
* three tmp files are created, fdm (metadata), fdx (index) and
255+
* fdt (contains stored field data). The first two files are small and
256+
* mmap-ing that should still be ok even is memory is scarce.
257+
* The fdt file is large and tends to cause more page faults when memory is scarce.
258+
*
259+
* @param name The name of the file in Lucene index
260+
* @param extension The extension of the in Lucene index
261+
* @return whether to avoid using delegate if the file is a tmp fdt file.
262+
*/
263+
static boolean avoidDelegateForFdtTempFiles(String name, LuceneFilesExtensions extension) {
264+
// NOTE, for now gated behind feature flag to observe impact of this change in benchmarks only:
265+
return TMP_FDT_NO_MMAP_FEATURE_FLAG.isEnabled() && extension == LuceneFilesExtensions.TMP && name.contains("fdt");
266+
}
267+
234268
MMapDirectory getDelegate() {
235269
return delegate;
236270
}

server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ public void testPreload() throws IOException {
6969
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", newIOContext(random())));
7070
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM));
7171
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.tmp", newIOContext(random())));
72-
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random())));
72+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("foo.fdt__0.tmp", newIOContext(random())));
73+
assertFalse(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdt__1.tmp", newIOContext(random())));
74+
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdm__0.tmp", newIOContext(random())));
75+
assertTrue(FsDirectoryFactory.HybridDirectory.useDelegate("_0.fdx__4.tmp", newIOContext(random())));
7376
MMapDirectory delegate = hybridDirectory.getDelegate();
7477
assertThat(delegate, Matchers.instanceOf(MMapDirectory.class));
7578
var func = fsDirectoryFactory.preLoadFuncMap.get(delegate);

0 commit comments

Comments
 (0)