PR comments

ldematte · ldematte · commit e01b764b7087 · 2025-10-08T12:34:28.000+02:00
diff --git a/qa/vector/build.gradle b/qa/vector/build.gradle
@@ -60,14 +60,21 @@ tasks.register("checkVec", JavaExec) {
   }
   def asyncProfilerPath = System.getProperty("asyncProfiler.path", null)
   if (asyncProfilerPath != null) {
+    def asyncProfilerEvent = System.getProperty("asyncProfiler.event", "cpu")
     if (OS.current().equals(OS.MAC)) {
       def asyncProfilerAgent = "${asyncProfilerPath}/lib/libasyncProfiler.dylib"
       println "Using async-profiler agent ${asyncProfilerAgent}"
-      jvmArgs "-agentpath:${asyncProfilerAgent}=start,event=cpu,interval=10ms,file=${layout.buildDirectory.asFile.get()}/tmp/elasticsearch-0_%t_%p.jfr"
+
+      // MacOS implementation of async-profiler does not support wall clock profiling with another event.
+      // Wall clock times can be obtained separately invoking this task with `-DasyncProfiler.event=wall`
+      jvmArgs "-agentpath:${asyncProfilerAgent}=start,event=${asyncProfilerEvent},interval=10ms,file=${layout.buildDirectory.asFile.get()}/tmp/elasticsearch-0_%t_%p.jfr"
     } else if (OS.current().equals(OS.LINUX)) {
+      // Linux implementation of async-profiler uses perf_event, which allows wall clock profiling with another event (cpu)
+      def additionalWallInterval = asyncProfilerEvent.equals("cpu") ? ",wall=50ms" : ""
+
       def asyncProfilerAgent = "${asyncProfilerPath}/lib/libasyncProfiler.so"
       println "Using async-profiler agent ${asyncProfilerAgent}"
-      jvmArgs "-agentpath:${asyncProfilerAgent}=start,event=cpu,interval=10ms,wall=50ms,file=${layout.buildDirectory.asFile.get()}/tmp/elasticsearch-0_%t_%p.jfr"
+      jvmArgs "-agentpath:${asyncProfilerAgent}=start,event=${asyncProfilerEvent},interval=10ms${additionalWallInterval},file=${layout.buildDirectory.asFile.get()}/tmp/elasticsearch-0_%t_%p.jfr"
     } else {
       println "Ignoring 'asyncProfiler.path': not available on ${OS.current()}";
     }
diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java
@@ -9,10 +9,12 @@
 
 package org.elasticsearch.test.knn;
 
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.core.PathUtils;
+import org.elasticsearch.monitor.jvm.JvmInfo;
 import org.elasticsearch.xcontent.ObjectParser;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.ToXContentObject;
@@ -54,7 +56,8 @@ record CmdLineArgs(
     int dimensions,
     boolean earlyTermination,
     KnnIndexTester.MergePolicyType mergePolicy,
-    double writerBufferSizeInMb
+    double writerBufferSizeInMb,
+    int writerMaxBufferedDocs
 ) implements ToXContentObject {
 
     static final ParseField DOC_VECTORS_FIELD = new ParseField("doc_vectors");
@@ -83,9 +86,15 @@ record CmdLineArgs(
     static final ParseField FILTER_SELECTIVITY_FIELD = new ParseField("filter_selectivity");
     static final ParseField SEED_FIELD = new ParseField("seed");
     static final ParseField MERGE_POLICY_FIELD = new ParseField("merge_policy");
-    static final ParseField WRITER_BUFFER_FIELD = new ParseField("writer_buffer_mb");
+    static final ParseField WRITER_BUFFER_MB_FIELD = new ParseField("writer_buffer_mb");
+    static final ParseField WRITER_BUFFER_DOCS_FIELD = new ParseField("writer_buffer_docs");
 
-    static final double DEFAULT_WRITER_BUFFER_MB = 128;
+    /** By default, in ES the default writer buffer size is 10% of the heap space
+     * (see {@code IndexingMemoryController.INDEX_BUFFER_SIZE_SETTING}).
+     * We configure the Java heap size for this tool in {@code build.gradle}; currently we default to 16GB, so in that case
+     * the buffer size would be 1.6GB.
+     */
+    static final double DEFAULT_WRITER_BUFFER_MB = (JvmInfo.jvmInfo().getMem().getHeapMax().getBytes() / (1024.0 * 1024.0)) * 0.1;
 
     static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
         Builder builder = PARSER.apply(parser, null);
@@ -121,7 +130,8 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
         PARSER.declareFloat(Builder::setFilterSelectivity, FILTER_SELECTIVITY_FIELD);
         PARSER.declareLong(Builder::setSeed, SEED_FIELD);
         PARSER.declareString(Builder::setMergePolicy, MERGE_POLICY_FIELD);
-        PARSER.declareDouble(Builder::setWriterBufferMb, WRITER_BUFFER_FIELD);
+        PARSER.declareDouble(Builder::setWriterBufferMb, WRITER_BUFFER_MB_FIELD);
+        PARSER.declareInt(Builder::setWriterMaxBufferedDocs, WRITER_BUFFER_DOCS_FIELD);
     }
 
     @Override
@@ -157,6 +167,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
         builder.field(EARLY_TERMINATION_FIELD.getPreferredName(), earlyTermination);
         builder.field(FILTER_SELECTIVITY_FIELD.getPreferredName(), filterSelectivity);
         builder.field(SEED_FIELD.getPreferredName(), seed);
+        builder.field(WRITER_BUFFER_MB_FIELD.getPreferredName(), writerBufferSizeInMb);
+        builder.field(WRITER_BUFFER_DOCS_FIELD.getPreferredName(), writerMaxBufferedDocs);
         return builder.endObject();
     }
 
@@ -193,6 +205,12 @@ static class Builder {
         private KnnIndexTester.MergePolicyType mergePolicy = null;
         private double writerBufferSizeInMb = DEFAULT_WRITER_BUFFER_MB;
 
+        /**
+         * Elasticsearch does not set this explicitly, and in Lucene this setting is
+         * disabled by default (writer flushes by RAM usage).
+         */
+        private int writerMaxBufferedDocs = IndexWriterConfig.DISABLE_AUTO_FLUSH;
+
         public Builder setDocVectors(List<String> docVectors) {
             if (docVectors == null || docVectors.isEmpty()) {
                 throw new IllegalArgumentException("Document vectors path must be provided");
@@ -327,6 +345,11 @@ public Builder setWriterBufferMb(double writerBufferSizeInMb) {
             return this;
         }
 
+        public Builder setWriterMaxBufferedDocs(int writerMaxBufferedDocs) {
+            this.writerMaxBufferedDocs = writerMaxBufferedDocs;
+            return this;
+        }
+
         public CmdLineArgs build() {
             if (docVectors == null) {
                 throw new IllegalArgumentException("Document vectors path must be provided");
@@ -362,7 +385,8 @@ public CmdLineArgs build() {
                 dimensions,
                 earlyTermination,
                 mergePolicy,
-                writerBufferSizeInMb
+                writerBufferSizeInMb,
+                writerMaxBufferedDocs
             );
         }
     }
diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java
@@ -241,7 +241,8 @@ public static void main(String[] args) throws Exception {
                     cmdLineArgs.vectorSpace(),
                     cmdLineArgs.numDocs(),
                     mergePolicy,
-                    cmdLineArgs.writerBufferSizeInMb()
+                    cmdLineArgs.writerBufferSizeInMb(),
+                    cmdLineArgs.writerMaxBufferedDocs()
                 );
                 if (cmdLineArgs.reindex() == false && Files.exists(indexPath) == false) {
                     throw new IllegalArgumentException("Index path does not exist: " + indexPath);
diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java
@@ -74,6 +74,7 @@ class KnnIndexer {
     private final int numIndexThreads;
     private final MergePolicy mergePolicy;
     private final double writerBufferSizeInMb;
+    private final int writerMaxBufferedDocs;
 
     KnnIndexer(
         List<Path> docsPath,
@@ -85,7 +86,8 @@ class KnnIndexer {
         VectorSimilarityFunction similarityFunction,
         int numDocs,
         MergePolicy mergePolicy,
-        double writerBufferSizeInMb
+        double writerBufferSizeInMb,
+        int writerMaxBufferedDocs
     ) {
         this.docsPath = docsPath;
         this.indexPath = indexPath;
@@ -97,12 +99,13 @@ class KnnIndexer {
         this.numDocs = numDocs;
         this.mergePolicy = mergePolicy;
         this.writerBufferSizeInMb = writerBufferSizeInMb;
+        this.writerMaxBufferedDocs = writerMaxBufferedDocs;
     }
 
     void createIndex(KnnIndexTester.Results result) throws IOException, InterruptedException, ExecutionException {
         IndexWriterConfig iwc = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.CREATE);
         iwc.setCodec(codec);
-        iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+        iwc.setMaxBufferedDocs(writerMaxBufferedDocs);
         iwc.setRAMBufferSizeMB(writerBufferSizeInMb);
         iwc.setUseCompoundFile(false);
         if (mergePolicy != null) {
diff --git a/server/src/main/java/org/elasticsearch/index/engine/EngineConfig.java b/server/src/main/java/org/elasticsearch/index/engine/EngineConfig.java
@@ -23,6 +23,7 @@
 import org.elasticsearch.common.unit.MemorySizeValue;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.core.UpdateForV10;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.codec.CodecProvider;
@@ -131,6 +132,7 @@ public Supplier<RetentionLeases> retentionLeasesSupplier() {
      * TODO: Remove in 9.0
      */
     @Deprecated
+    @UpdateForV10(owner = UpdateForV10.Owner.DISTRIBUTED_INDEXING)
     public static final Setting<Boolean> INDEX_OPTIMIZE_AUTO_GENERATED_IDS = Setting.boolSetting(
         "index.optimize_auto_generated_id",
         true,
@@ -213,6 +215,7 @@ public EngineConfig(
         // Add an escape hatch in case this change proves problematic - it used
         // to be a fixed amound of RAM: 256 MB.
         // TODO: Remove this escape hatch in 8.x
+        @UpdateForV10(owner = UpdateForV10.Owner.DISTRIBUTED_INDEXING)
         final String escapeHatchProperty = "es.index.memory.max_index_buffer_size";
         String maxBufferSize = System.getProperty(escapeHatchProperty);
         if (maxBufferSize != null) {