Online prewarming service interface docs and usage in SearchService

andreidan · andreidan · commit a0d7b5a427e5 · 2025-04-09T21:02:36.000+01:00
This adds the interface for search online prewarming with a default NOOP
implementation. This also hooks the interface in the SearchService after
we fork the query phase to the search thread pool.
diff --git a/server/src/main/java/org/elasticsearch/action/search/OnlinePrewarmingService.java b/server/src/main/java/org/elasticsearch/action/search/OnlinePrewarmingService.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.action.search;
+
+import org.elasticsearch.index.shard.IndexShard;
+
+/**
+ * Interface for prewarming the segments of a shard, tailored for consumption at
+ * higher volumes than alternative warming strategies (i.e. offline / recovery warming)
+ * that are more speculative.
+ */
+public interface OnlinePrewarmingService {
+    OnlinePrewarmingService NOOP = (indexShard, skipPrewarmingCondition) -> {};
+
+    /**
+     * Prewarms resources (typically segments) for the given index shard.
+     *
+     * @param indexShard the index shard for which resources should be prewarmed
+     * @param skipPrewarming a flag indicating whether prewarming should be skipped.
+     *                       Callers should  decide if certain prewarming calls
+     *                       should be skipped and indicate this decision via this
+     *                       flag.
+     */
+    void prewarm(IndexShard indexShard, boolean skipPrewarming);
+}
diff --git a/server/src/main/java/org/elasticsearch/action/search/OnlinePrewarmingServiceProvider.java b/server/src/main/java/org/elasticsearch/action/search/OnlinePrewarmingServiceProvider.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.action.search;
+
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.threadpool.ThreadPool;
+
+public interface OnlinePrewarmingServiceProvider {
+    OnlinePrewarmingServiceProvider DEFAULT = (settings, threadPool, clusterService) -> OnlinePrewarmingService.NOOP;
+
+    OnlinePrewarmingService create(Settings settings, ThreadPool threadPool, ClusterService clusterService);
+}
diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java
@@ -485,6 +485,7 @@ public void apply(Settings value, Settings current, Settings previous) {
         SearchService.CCS_VERSION_CHECK_SETTING,
         SearchService.CCS_COLLECT_TELEMETRY,
         SearchService.BATCHED_QUERY_PHASE,
+        SearchService.PREWARMING_THRESHOLD_THREADPOOL_SIZE_FACTOR_POOL_SIZE,
         MultiBucketConsumerService.MAX_BUCKET_SETTING,
         SearchService.LOW_LEVEL_CANCELLATION_SETTING,
         SearchService.MAX_OPEN_SCROLL_CONTEXT,
diff --git a/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java b/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java
@@ -9,6 +9,8 @@
 
 package org.elasticsearch.node;
 
+import org.elasticsearch.action.search.OnlinePrewarmingService;
+import org.elasticsearch.action.search.OnlinePrewarmingServiceProvider;
 import org.elasticsearch.client.internal.node.NodeClient;
 import org.elasticsearch.cluster.ClusterInfoService;
 import org.elasticsearch.cluster.InternalClusterInfoService;
@@ -123,6 +125,10 @@ SearchService newSearchService(
         ExecutorSelector executorSelector,
         Tracer tracer
     ) {
+        OnlinePrewarmingService onlinePrewarmingService = pluginsService.loadSingletonServiceProvider(
+            OnlinePrewarmingServiceProvider.class,
+            () -> OnlinePrewarmingServiceProvider.DEFAULT
+        ).create(clusterService.getSettings(), threadPool, clusterService);
         return new SearchService(
             clusterService,
             indicesService,
@@ -132,7 +138,8 @@ SearchService newSearchService(
             fetchPhase,
             circuitBreakerService,
             executorSelector,
-            tracer
+            tracer,
+            onlinePrewarmingService
         );
     }
 
diff --git a/server/src/main/java/org/elasticsearch/search/SearchService.java b/server/src/main/java/org/elasticsearch/search/SearchService.java
@@ -24,6 +24,7 @@
 import org.elasticsearch.action.ResolvedIndices;
 import org.elasticsearch.action.search.CanMatchNodeRequest;
 import org.elasticsearch.action.search.CanMatchNodeResponse;
+import org.elasticsearch.action.search.OnlinePrewarmingService;
 import org.elasticsearch.action.search.SearchShardTask;
 import org.elasticsearch.action.search.SearchType;
 import org.elasticsearch.action.support.TransportActions;
@@ -147,6 +148,7 @@
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
+import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -283,6 +285,16 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
         Property.NodeScope
     );
 
+    // This setting ensures that we skip online prewarming tasks if the queuing in the search thread pool
+    // reaches the configured factor X number of max threads in the search thread pool, such that
+    // the system has a chance to catch up and prewarming doesn't take over the network bandwidth
+    public static final Setting<Integer> PREWARMING_THRESHOLD_THREADPOOL_SIZE_FACTOR_POOL_SIZE = Setting.intSetting(
+        "search.online_prewarming_threshold_poolsize_factor",
+        10,
+        0, // 0 would mean we only execute online prewarming if there's no queuing in the search tp
+        Setting.Property.NodeScope
+    );
+
     private static final boolean BATCHED_QUERY_PHASE_FEATURE_FLAG = new FeatureFlag("batched_query_phase").isEnabled();
 
     /**
@@ -317,6 +329,8 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
 
     private final FetchPhase fetchPhase;
     private final CircuitBreaker circuitBreaker;
+    private final OnlinePrewarmingService onlinePrewarmingService;
+    private final int prewarmingMaxPoolFactorThreshold;
     private volatile Executor searchExecutor;
     private volatile boolean enableQueryPhaseParallelCollection;
 
@@ -362,7 +376,8 @@ public SearchService(
         FetchPhase fetchPhase,
         CircuitBreakerService circuitBreakerService,
         ExecutorSelector executorSelector,
-        Tracer tracer
+        Tracer tracer,
+        OnlinePrewarmingService onlinePrewarmingService
     ) {
         Settings settings = clusterService.getSettings();
         this.threadPool = threadPool;
@@ -375,7 +390,7 @@ public SearchService(
         this.multiBucketConsumerService = new MultiBucketConsumerService(clusterService, settings, circuitBreaker);
         this.executorSelector = executorSelector;
         this.tracer = tracer;
-
+        this.onlinePrewarmingService = onlinePrewarmingService;
         TimeValue keepAliveInterval = KEEPALIVE_INTERVAL_SETTING.get(settings);
         setKeepAlives(DEFAULT_KEEPALIVE_SETTING.get(settings), MAX_KEEPALIVE_SETTING.get(settings));
 
@@ -427,6 +442,7 @@ public SearchService(
         memoryAccountingBufferSize = MEMORY_ACCOUNTING_BUFFER_SIZE.get(settings).getBytes();
         clusterService.getClusterSettings()
             .addSettingsUpdateConsumer(MEMORY_ACCOUNTING_BUFFER_SIZE, newValue -> this.memoryAccountingBufferSize = newValue.getBytes());
+        prewarmingMaxPoolFactorThreshold = PREWARMING_THRESHOLD_THREADPOOL_SIZE_FACTOR_POOL_SIZE.get(settings);
     }
 
     public CircuitBreaker getCircuitBreaker() {
@@ -702,6 +718,12 @@ private <T extends RefCounted> void ensureAfterSeqNoRefreshed(
         try {
             if (waitForCheckpoint <= UNASSIGNED_SEQ_NO) {
                 runAsync(executor, executable, listener);
+                // we successfully submitted the async task to the search pool so let's prewarm the shard
+                onlinePrewarmingService.prewarm(
+                    shard,
+                    executor instanceof ThreadPoolExecutor tpe
+                        && ((tpe.getMaximumPoolSize() * prewarmingMaxPoolFactorThreshold) < tpe.getQueue().size())
+                );
                 return;
             }
             if (shard.indexSettings().getRefreshInterval().getMillis() <= 0) {
@@ -778,6 +800,12 @@ private void searchReady() {
                             timeoutTask.cancel();
                         }
                         runAsync(executor, executable, listener);
+                        // we successfully submitted the async task to the search pool so let's prewarm the shard
+                        onlinePrewarmingService.prewarm(
+                            shard,
+                            executor instanceof ThreadPoolExecutor tpe
+                                && ((tpe.getMaximumPoolSize() * prewarmingMaxPoolFactorThreshold) < tpe.getQueue().size())
+                        );
                     }
                 }
             });
@@ -939,7 +967,8 @@ public void executeQueryPhase(
             freeReaderContext(readerContext.id());
             throw e;
         }
-        runAsync(getExecutor(readerContext.indexShard()), () -> {
+        Executor executor = getExecutor(readerContext.indexShard());
+        runAsync(executor, () -> {
             final ShardSearchRequest shardSearchRequest = readerContext.getShardSearchRequest(null);
             try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.QUERY, false);) {
                 var opsListener = searchContext.indexShard().getSearchOperationListener();
@@ -965,6 +994,12 @@ public void executeQueryPhase(
                 throw e;
             }
         }, wrapFailureListener(listener, readerContext, markAsUsed));
+        // we successfully submitted the async task to the search pool so let's prewarm the shard
+        onlinePrewarmingService.prewarm(
+            readerContext.indexShard(),
+            executor instanceof ThreadPoolExecutor tpe
+                && ((tpe.getMaximumPoolSize() * prewarmingMaxPoolFactorThreshold) < tpe.getQueue().size())
+        );
     }
 
     /**
@@ -991,7 +1026,8 @@ public void executeQueryPhase(
         final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(shardSearchRequest));
         rewriteAndFetchShardRequest(readerContext.indexShard(), shardSearchRequest, listener.delegateFailure((l, rewritten) -> {
             // fork the execution in the search thread pool
-            runAsync(getExecutor(readerContext.indexShard()), () -> {
+            Executor executor = getExecutor(readerContext.indexShard());
+            runAsync(executor, () -> {
                 readerContext.setAggregatedDfs(request.dfs());
                 try (SearchContext searchContext = createContext(readerContext, shardSearchRequest, task, ResultsType.QUERY, true);) {
                     final QuerySearchResult queryResult;
@@ -1029,6 +1065,12 @@ public void executeQueryPhase(
                     throw e;
                 }
             }, wrapFailureListener(l, readerContext, markAsUsed));
+            // we successfully submitted the async task to the search pool so let's prewarm the shard
+            onlinePrewarmingService.prewarm(
+                readerContext.indexShard(),
+                executor instanceof ThreadPoolExecutor tpe
+                    && ((tpe.getMaximumPoolSize() * prewarmingMaxPoolFactorThreshold) < tpe.getQueue().size())
+            );
         }));
     }
 
diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java
@@ -49,6 +49,7 @@
 import org.elasticsearch.action.bulk.TransportShardBulkAction;
 import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.action.resync.TransportResyncReplicationAction;
+import org.elasticsearch.action.search.OnlinePrewarmingService;
 import org.elasticsearch.action.search.SearchExecutionStatsCollector;
 import org.elasticsearch.action.search.SearchPhaseController;
 import org.elasticsearch.action.search.SearchRequest;
@@ -2314,7 +2315,8 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() {
                     new FetchPhase(Collections.emptyList()),
                     new NoneCircuitBreakerService(),
                     EmptySystemIndices.INSTANCE.getExecutorSelector(),
-                    Tracer.NOOP
+                    Tracer.NOOP,
+                    OnlinePrewarmingService.NOOP
                 );
 
                 final SnapshotFilesProvider snapshotFilesProvider = new SnapshotFilesProvider(repositoriesService);
diff --git a/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java b/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java
@@ -9,6 +9,7 @@
 
 package org.elasticsearch.search;
 
+import org.elasticsearch.action.search.OnlinePrewarmingService;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.core.TimeValue;
@@ -95,7 +96,8 @@ public MockSearchService(
             fetchPhase,
             circuitBreakerService,
             executorSelector,
-            tracer
+            tracer,
+            OnlinePrewarmingService.NOOP
         );
     }