elastic · DiannaHohensee · Jul 10, 2025 · Jul 1, 2025 · Jul 2, 2025 · Jul 2, 2025
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java
@@ -40,7 +40,11 @@
 
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
 import static org.elasticsearch.threadpool.ThreadPool.DEFAULT_INDEX_AUTOSCALING_EWMA_ALPHA;
+import static org.elasticsearch.threadpool.ThreadPool.DEFAULT_WRITE_THREAD_POOL_QUEUE_LATENCY_EWMA_ALPHA;
+import static org.elasticsearch.threadpool.ThreadPool.DEFAULT_WRITE_THREAD_POOL_THREAD_UTILIZATION_EWMA_ALPHA;
 import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING;
+import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOL_QUEUE_LATENCY_EWMA_ALPHA;
+import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOL_THREAD_UTILIZATION_EWMA_ALPHA;
 import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -234,19 +238,41 @@ public void assertValid(TestTelemetryPlugin testTelemetryPlugin, String metricSu
         }
     }
 
-    public void testWriteThreadpoolEwmaAlphaSetting() {
+    public void testWriteThreadpoolsEwmaAlphaSetting() {
         Settings settings = Settings.EMPTY;
-        var ewmaAlpha = DEFAULT_INDEX_AUTOSCALING_EWMA_ALPHA;
+        var executionEwmaAlpha = DEFAULT_INDEX_AUTOSCALING_EWMA_ALPHA;
+        var queueLatencyEwmaAlpha = DEFAULT_WRITE_THREAD_POOL_QUEUE_LATENCY_EWMA_ALPHA;
+        var threadUtilizationEwmaAlpha = DEFAULT_WRITE_THREAD_POOL_THREAD_UTILIZATION_EWMA_ALPHA;
         if (randomBoolean()) {
-            ewmaAlpha = randomDoubleBetween(0.0, 1.0, true);
-            settings = Settings.builder().put(WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING.getKey(), ewmaAlpha).build();
+            executionEwmaAlpha = randomDoubleBetween(0.0, 1.0, true);
+            queueLatencyEwmaAlpha = randomDoubleBetween(0.0, 1.0, true);
+            threadUtilizationEwmaAlpha = randomDoubleBetween(0.0, 1.0, true);
+            settings = Settings.builder()
+                .put(WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING.getKey(), executionEwmaAlpha)
+                .put(WRITE_THREAD_POOL_QUEUE_LATENCY_EWMA_ALPHA.getKey(), queueLatencyEwmaAlpha)
+                .put(WRITE_THREAD_POOL_THREAD_UTILIZATION_EWMA_ALPHA.getKey(), threadUtilizationEwmaAlpha)
+                .build();
         }
         var nodeName = internalCluster().startNode(settings);
         var threadPool = internalCluster().getInstance(ThreadPool.class, nodeName);
+
+        // Verify that the write thread pools all use the tracking executor.
         for (var name : List.of(ThreadPool.Names.WRITE, ThreadPool.Names.SYSTEM_WRITE, ThreadPool.Names.SYSTEM_CRITICAL_WRITE)) {
             assertThat(threadPool.executor(name), instanceOf(TaskExecutionTimeTrackingEsThreadPoolExecutor.class));
             final var executor = (TaskExecutionTimeTrackingEsThreadPoolExecutor) threadPool.executor(name);
-            assertThat(Double.compare(executor.getEwmaAlpha(), ewmaAlpha), CoreMatchers.equalTo(0));
+            assertThat(Double.compare(executor.getExecutionEwmaAlpha(), executionEwmaAlpha), CoreMatchers.equalTo(0));
+
+            // Only the WRITE thread pool should enable further tracking.
+            if (name.equals(ThreadPool.Names.WRITE) == false) {
+                assertFalse(executor.trackingQueueLatencyEwma());
+                assertFalse(executor.trackUtilizationEwma());
+            } else {
+                // Verify that the WRITE thread pool has extra tracking enabled.
+                assertTrue(executor.trackingQueueLatencyEwma());
+                assertTrue(executor.trackUtilizationEwma());
+                assertThat(Double.compare(executor.getQueueLatencyEwmaAlpha(), queueLatencyEwmaAlpha), CoreMatchers.equalTo(0));
+                assertThat(Double.compare(executor.getPoolUtilizationEwmaAlpha(), threadUtilizationEwmaAlpha), CoreMatchers.equalTo(0));
+            }
         }
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java
@@ -547,6 +547,8 @@ public void apply(Settings value, Settings current, Settings previous) {
         ThreadPool.LATE_TIME_INTERVAL_WARN_THRESHOLD_SETTING,
         ThreadPool.SLOW_SCHEDULER_TASK_WARN_THRESHOLD_SETTING,
         ThreadPool.WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING,
+        ThreadPool.WRITE_THREAD_POOL_QUEUE_LATENCY_EWMA_ALPHA,
+        ThreadPool.WRITE_THREAD_POOL_THREAD_UTILIZATION_EWMA_ALPHA,
         FastVectorHighlighter.SETTING_TV_HIGHLIGHT_MULTI_VALUE,
         Node.BREAKER_TYPE_KEY,
         OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING,

@@ -577,24 +577,94 @@ public void rejectedExecution(Runnable task, ThreadPoolExecutor executor) {
     }
 
     public static class TaskTrackingConfig {
-        // This is a random starting point alpha. TODO: revisit this with actual testing and/or make it configurable
-        public static final double DEFAULT_EWMA_ALPHA = 0.3;
+        public static final double DEFAULT_EXECUTION_TIME_EWMA_ALPHA_FOR_TEST = 0.3;
+        public static final double DEFAULT_QUEUE_LATENCY_EWMA_ALPHA_FOR_TEST = 0.6;
+        public static final double DEFAULT_POOL_UTILIZATION_EWMA_ALPHA_FOR_TEST = 0.6;
 
         private final boolean trackExecutionTime;
         private final boolean trackOngoingTasks;
-        private final double ewmaAlpha;
+        private final boolean trackQueueLatencyEWMA;
+        private final boolean trackPoolUtilizationEWMA;
+        private final double executionTimeEwmaAlpha;
+        private final double queueLatencyEWMAAlpha;
+        private final double poolUtilizationEWMAAlpha;
+
+        public static final TaskTrackingConfig DO_NOT_TRACK = new TaskTrackingConfig(
+            false,
+            false,
+            false,
+            DEFAULT_EXECUTION_TIME_EWMA_ALPHA_FOR_TEST,
+            DEFAULT_QUEUE_LATENCY_EWMA_ALPHA_FOR_TEST,
+            DEFAULT_POOL_UTILIZATION_EWMA_ALPHA_FOR_TEST
+        );
+        public static final TaskTrackingConfig DEFAULT = new TaskTrackingConfig(
+            true,
+            false,
+            false,
+            DEFAULT_EXECUTION_TIME_EWMA_ALPHA_FOR_TEST,
+            DEFAULT_QUEUE_LATENCY_EWMA_ALPHA_FOR_TEST,
+            DEFAULT_POOL_UTILIZATION_EWMA_ALPHA_FOR_TEST
+        );
 
-        public static final TaskTrackingConfig DO_NOT_TRACK = new TaskTrackingConfig(false, false, DEFAULT_EWMA_ALPHA);
-        public static final TaskTrackingConfig DEFAULT = new TaskTrackingConfig(true, false, DEFAULT_EWMA_ALPHA);
+        public TaskTrackingConfig(boolean trackOngoingTasks, double executionTimeEWMAAlpha) {
+            this(
+                true,
+                trackOngoingTasks,
+                false,
+                false,
+                executionTimeEWMAAlpha,
+                DEFAULT_QUEUE_LATENCY_EWMA_ALPHA_FOR_TEST,
+                DEFAULT_POOL_UTILIZATION_EWMA_ALPHA_FOR_TEST
+            );
+        }
 
-        public TaskTrackingConfig(boolean trackOngoingTasks, double ewmaAlpha) {
-            this(true, trackOngoingTasks, ewmaAlpha);
+        /**
+         * Execution tracking enabled constructor, with extra options to enable further specialized tracking.
+         */
+        public TaskTrackingConfig(
+            boolean trackOngoingTasks,
+            boolean trackQueueLatencyEWMA,
+            boolean trackPoolUtilizationEWMA,
+            double executionTimeEWMAAlpha,
+            double queueLatencyEWMAAlpha,
+            double poolUtilizationEWMAAlpha
+        ) {
+            this(
+                true,
+                trackOngoingTasks,
+                trackQueueLatencyEWMA,
+                trackPoolUtilizationEWMA,
+                executionTimeEWMAAlpha,
+                queueLatencyEWMAAlpha,
+                poolUtilizationEWMAAlpha
+            );
         }
 
-        private TaskTrackingConfig(boolean trackExecutionTime, boolean trackOngoingTasks, double EWMAAlpha) {
+        /**
+         * @param trackExecutionTime Whether to track execution stats
+         * @param trackOngoingTasks Whether to track ongoing task execution time, not just finished tasks
+         * @param trackQueueLatencyEWMA Whether to track queue latency {@link org.elasticsearch.common.ExponentiallyWeightedMovingAverage}
+         * @param trackPoolUtilizationEWMA Whether to track the EWMA for thread pool thread utilization (percent use).
+         * @param executionTimeEWMAAlpha The alpha seed for execution time EWMA (ExponentiallyWeightedMovingAverage).
+         * @param queueLatencyEWMAAlpha The alpha seed for task queue latency EWMA (ExponentiallyWeightedMovingAverage).
+         * @param poolUtilizationEWMAAlpha The alpha seed for pool utilization EWMA (ExponentiallyWeightedMovingAverage).
+         */
+        private TaskTrackingConfig(
+            boolean trackExecutionTime,
+            boolean trackOngoingTasks,
+            boolean trackQueueLatencyEWMA,
+            boolean trackPoolUtilizationEWMA,
+            double executionTimeEWMAAlpha,
+            double queueLatencyEWMAAlpha,
+            double poolUtilizationEWMAAlpha
+        ) {
             this.trackExecutionTime = trackExecutionTime;
             this.trackOngoingTasks = trackOngoingTasks;
-            this.ewmaAlpha = EWMAAlpha;
+            this.trackQueueLatencyEWMA = trackQueueLatencyEWMA;
+            this.trackPoolUtilizationEWMA = trackPoolUtilizationEWMA;
+            this.executionTimeEwmaAlpha = executionTimeEWMAAlpha;
+            this.queueLatencyEWMAAlpha = queueLatencyEWMAAlpha;
+            this.poolUtilizationEWMAAlpha = poolUtilizationEWMAAlpha;
         }
 
         public boolean trackExecutionTime() {
@@ -605,8 +675,24 @@ public boolean trackOngoingTasks() {
             return trackOngoingTasks;
         }
 
-        public double getEwmaAlpha() {
-            return ewmaAlpha;
+        public boolean trackQueueLatencyEWMA() {
+            return trackQueueLatencyEWMA;
+        }
+
+        public boolean trackPoolUtilizationEWMA() {
+            return trackPoolUtilizationEWMA;
+        }
+
+        public double getExecutionTimeEwmaAlpha() {
+            return executionTimeEwmaAlpha;
+        }
+
+        public double getQueueLatencyEwmaAlpha() {
+            return queueLatencyEWMAAlpha;
+        }
+
+        public double getPoolUtilizationEwmaAlpha() {
+            return poolUtilizationEWMAAlpha;
         }
     }
 

@@ -27,6 +27,7 @@
 import java.util.concurrent.RejectedExecutionHandler;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.atomic.LongAdder;
 import java.util.function.Function;
 
@@ -50,6 +51,11 @@ public final class TaskExecutionTimeTrackingEsThreadPoolExecutor extends EsThrea
     private volatile long lastPollTime = System.nanoTime();
     private volatile long lastTotalExecutionTime = 0;
     private final ExponentialBucketHistogram queueLatencyMillisHistogram = new ExponentialBucketHistogram(QUEUE_LATENCY_HISTOGRAM_BUCKETS);
+    private final boolean trackQueueLatencyEWMA;
+    private final boolean trackUtilizationEWMA;
+    private final ExponentiallyWeightedMovingAverage queueLatencyMillisEWMA;
+    private final ExponentiallyWeightedMovingAverage percentPoolUtilizationEWMA;
+    private final AtomicReference<Double> lastUtilizationValue = new AtomicReference<>(0.0);
 
     TaskExecutionTimeTrackingEsThreadPoolExecutor(
         String name,
@@ -65,9 +71,14 @@ public final class TaskExecutionTimeTrackingEsThreadPoolExecutor extends EsThrea
         TaskTrackingConfig trackingConfig
     ) {
         super(name, corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory, handler, contextHolder);
+
         this.runnableWrapper = runnableWrapper;
-        this.executionEWMA = new ExponentiallyWeightedMovingAverage(trackingConfig.getEwmaAlpha(), 0);
+        this.executionEWMA = new ExponentiallyWeightedMovingAverage(trackingConfig.getExecutionTimeEwmaAlpha(), 0);
         this.trackOngoingTasks = trackingConfig.trackOngoingTasks();
+        this.trackQueueLatencyEWMA = trackingConfig.trackQueueLatencyEWMA();
+        this.queueLatencyMillisEWMA = new ExponentiallyWeightedMovingAverage(trackingConfig.getQueueLatencyEwmaAlpha(), 0);
+        this.trackUtilizationEWMA = trackingConfig.trackPoolUtilizationEWMA();
+        this.percentPoolUtilizationEWMA = new ExponentiallyWeightedMovingAverage(trackingConfig.getPoolUtilizationEwmaAlpha(), 0);
     }
 
     public List<Instrument> setupMetrics(MeterRegistry meterRegistry, String threadPoolName) {
@@ -136,6 +147,20 @@ public int getCurrentQueueSize() {
         return getQueue().size();
     }
 
+    public double getPercentPoolUtilizationEWMA() {
+        if (trackUtilizationEWMA == false) {
+            return 0;
+        }
+        return this.percentPoolUtilizationEWMA.getAverage();
+    }
+
+    public double getQueuedTaskLatencyMillisEWMA() {
+        if (trackQueueLatencyEWMA == false) {
+            return 0;
+        }
+        return queueLatencyMillisEWMA.getAverage();
+    }
+
     /**
      * Returns the fraction of the maximum possible thread time that was actually used since the last time
      * this method was called.
@@ -153,20 +178,41 @@ public double pollUtilization() {
 
         lastTotalExecutionTime = currentTotalExecutionTimeNanos;
         lastPollTime = currentPollTimeNanos;
+
+        if (trackUtilizationEWMA) {
+            percentPoolUtilizationEWMA.addValue(utilizationSinceLastPoll);
+            // Test only tracking.
+            assert setUtilizationSinceLastPoll(utilizationSinceLastPoll);
+        }
+
         return utilizationSinceLastPoll;
     }
 
+    // Test only
+    private boolean setUtilizationSinceLastPoll(double utilizationSinceLastPoll) {
+        lastUtilizationValue.set(utilizationSinceLastPoll);
+        return true;
+    }
+
     @Override
     protected void beforeExecute(Thread t, Runnable r) {
         if (trackOngoingTasks) {
             ongoingTasks.put(r, System.nanoTime());
         }
+
         assert super.unwrap(r) instanceof TimedRunnable : "expected only TimedRunnables in queue";
         final TimedRunnable timedRunnable = (TimedRunnable) super.unwrap(r);
         timedRunnable.beforeExecute();
         final long taskQueueLatency = timedRunnable.getQueueTimeNanos();
         assert taskQueueLatency >= 0;
-        queueLatencyMillisHistogram.addObservation(TimeUnit.NANOSECONDS.toMillis(taskQueueLatency));
+        var queueLatencyMillis = TimeUnit.NANOSECONDS.toMillis(taskQueueLatency);
+        queueLatencyMillisHistogram.addObservation(queueLatencyMillis);
+
+        if (trackQueueLatencyEWMA) {
+            if (queueLatencyMillis > 0) {
+                queueLatencyMillisEWMA.addValue(queueLatencyMillis);
+            }
+        }
     }
 
     @Override
@@ -208,6 +254,12 @@ protected void appendThreadPoolExecutorDetails(StringBuilder sb) {
             .append("total task execution time = ")
             .append(TimeValue.timeValueNanos(getTotalTaskExecutionTime()))
             .append(", ");
+        if (trackQueueLatencyEWMA) {
+            sb.append("task queue EWMA = ").append(TimeValue.timeValueMillis((long) getQueuedTaskLatencyMillisEWMA())).append(", ");
+        }
+        if (trackUtilizationEWMA) {
+            sb.append("thread pool utilization percentage EWMA = ").append(getPercentPoolUtilizationEWMA()).append(", ");
+        }
     }
 
     /**
@@ -222,7 +274,27 @@ public Map<Runnable, Long> getOngoingTasks() {
     }
 
     // Used for testing
-    public double getEwmaAlpha() {
+    public double getExecutionEwmaAlpha() {
         return executionEWMA.getAlpha();
     }
+
+    // Used for testing
+    public double getQueueLatencyEwmaAlpha() {
+        return queueLatencyMillisEWMA.getAlpha();
+    }
+
+    // Used for testing
+    public double getPoolUtilizationEwmaAlpha() {
+        return percentPoolUtilizationEWMA.getAlpha();
+    }
+
+    // Used for testing
+    public boolean trackingQueueLatencyEwma() {
+        return trackQueueLatencyEWMA;
+    }
+
+    // Used for testing
+    public boolean trackUtilizationEwma() {
+        return trackUtilizationEWMA;
+    }
 }
@@ -21,6 +21,8 @@
 
 import static java.util.Collections.unmodifiableMap;
 import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING;
+import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOL_QUEUE_LATENCY_EWMA_ALPHA;
+import static org.elasticsearch.threadpool.ThreadPool.WRITE_THREAD_POOL_THREAD_UTILIZATION_EWMA_ALPHA;
 import static org.elasticsearch.threadpool.ThreadPool.searchAutoscalingEWMA;
 
 public class DefaultBuiltInExecutorBuilders implements BuiltInExecutorBuilders {
@@ -32,6 +34,8 @@ public Map<String, ExecutorBuilder> getBuilders(Settings settings, int allocated
         final int halfProcMaxAt10 = ThreadPool.halfAllocatedProcessorsMaxTen(allocatedProcessors);
         final int genericThreadPoolMax = ThreadPool.boundedBy(4 * allocatedProcessors, 128, 512);
         final double indexAutoscalingEWMA = WRITE_THREAD_POOLS_EWMA_ALPHA_SETTING.get(settings);
+        final double queueLatencyEWMAAlpha = WRITE_THREAD_POOL_QUEUE_LATENCY_EWMA_ALPHA.get(settings);
+        final double threadUtilizationEWMAAlpha = WRITE_THREAD_POOL_THREAD_UTILIZATION_EWMA_ALPHA.get(settings);
 
         Map<String, ExecutorBuilder> result = new HashMap<>();
         result.put(
@@ -55,7 +59,14 @@ public Map<String, ExecutorBuilder> getBuilders(Settings settings, int allocated
                 ThreadPool.Names.WRITE,
                 allocatedProcessors,
                 10000,
-                new EsExecutors.TaskTrackingConfig(true, indexAutoscalingEWMA)
+                new EsExecutors.TaskTrackingConfig(
+                    true,
+                    true,
+                    true,
+                    indexAutoscalingEWMA,
+                    queueLatencyEWMAAlpha,
+                    threadUtilizationEWMAAlpha
+                )
             )
         );
         int searchOrGetThreadPoolSize = ThreadPool.searchOrGetThreadPoolSize(allocatedProcessors);