diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index aecc750bd4e39..e9b9a5ea4ab9e 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -614,6 +614,7 @@ public void apply(Settings value, Settings current, Settings previous) { DataStreamLifecycle.CLUSTER_LIFECYCLE_DEFAULT_ROLLOVER_SETTING, IndicesClusterStateService.SHARD_LOCK_RETRY_INTERVAL_SETTING, IndicesClusterStateService.SHARD_LOCK_RETRY_TIMEOUT_SETTING, + IndicesClusterStateService.CONCURRENT_SHARD_CLOSE_LIMIT, IngestSettings.GROK_WATCHDOG_INTERVAL, IngestSettings.GROK_WATCHDOG_MAX_EXECUTION_TIME, TDigestExecutionHint.SETTING, diff --git a/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java b/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java index 0e9901bc05682..9e31bc1aef9a7 100644 --- a/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java +++ b/server/src/main/java/org/elasticsearch/indices/cluster/IndicesClusterStateService.java @@ -116,6 +116,18 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent imple Setting.Property.NodeScope ); + /** + * Maximum number of shards to try and close concurrently. Defaults to the smaller of {@code node.processors} and {@code 10}, but can be + * set to any positive integer. + */ + public static final Setting CONCURRENT_SHARD_CLOSE_LIMIT = Setting.intSetting( + "indices.store.max_concurrent_closing_shards", + settings -> Integer.toString(Math.min(10, EsExecutors.NODE_PROCESSORS_SETTING.get(settings).roundUp())), + 1, + Integer.MAX_VALUE, + Setting.Property.NodeScope + ); + final AllocatedIndices> indicesService; private final ClusterService clusterService; private final ThreadPool threadPool; @@ -1347,7 +1359,7 @@ enum IndexRemovalReason { } } - private static class ShardCloseExecutor implements Executor { + static class ShardCloseExecutor implements Executor { private final ThrottledTaskRunner throttledTaskRunner; @@ -1360,8 +1372,11 @@ private static class ShardCloseExecutor implements Executor { // can't close the old ones down fast enough. Maybe we could block or throttle new shards starting while old shards are still // shutting down, given that starting new shards is already async. Since this seems unlikely in practice, we opt for the simple // approach here. - final var maxThreads = Math.max(EsExecutors.NODE_PROCESSORS_SETTING.get(settings).roundUp(), 10); - throttledTaskRunner = new ThrottledTaskRunner(IndicesClusterStateService.class.getCanonicalName(), maxThreads, delegate); + throttledTaskRunner = new ThrottledTaskRunner( + IndicesClusterStateService.class.getCanonicalName(), + CONCURRENT_SHARD_CLOSE_LIMIT.get(settings), + delegate + ); } @Override diff --git a/server/src/test/java/org/elasticsearch/indices/cluster/ShardCloseExecutorTests.java b/server/src/test/java/org/elasticsearch/indices/cluster/ShardCloseExecutorTests.java new file mode 100644 index 0000000000000..d4699454a4b6e --- /dev/null +++ b/server/src/test/java/org/elasticsearch/indices/cluster/ShardCloseExecutorTests.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.indices.cluster; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.test.ESTestCase; + +import java.util.ArrayList; +import java.util.concurrent.atomic.AtomicInteger; + +public class ShardCloseExecutorTests extends ESTestCase { + + public void testThrottling() { + // This defaults to the number of CPUs of the machine running the tests which could be either side of 10. + final var defaultProcessors = EsExecutors.NODE_PROCESSORS_SETTING.get(Settings.EMPTY).roundUp(); + ensureThrottling(Math.min(10, defaultProcessors), Settings.EMPTY); + + if (10 < defaultProcessors) { + ensureThrottling( + 10, + Settings.builder().put(EsExecutors.NODE_PROCESSORS_SETTING.getKey(), between(10, defaultProcessors - 1)).build() + ); + } // else we cannot run this check, the machine running the tests doesn't have enough CPUs + + if (1 < defaultProcessors) { + final var fewProcessors = between(1, Math.min(10, defaultProcessors - 1)); + ensureThrottling(fewProcessors, Settings.builder().put(EsExecutors.NODE_PROCESSORS_SETTING.getKey(), fewProcessors).build()); + } // else we cannot run this check, the machine running the tests has less than 2 whole CPUs (and we already tested the 1 case) + + // but in any case we can override the throttle regardless of its default value + final var override = between(1, defaultProcessors * 2); + ensureThrottling( + override, + Settings.builder().put(IndicesClusterStateService.CONCURRENT_SHARD_CLOSE_LIMIT.getKey(), override).build() + ); + } + + private static void ensureThrottling(int expectedLimit, Settings settings) { + final var tasksToRun = new ArrayList(expectedLimit + 1); + final var executor = new IndicesClusterStateService.ShardCloseExecutor(settings, tasksToRun::add); + final var runCount = new AtomicInteger(); + + // enqueue one more task than the throttling limit + for (int i = 0; i < expectedLimit + 1; i++) { + executor.execute(runCount::incrementAndGet); + } + + // check that we submitted tasks up to the expected limit, holding back the final task behind the throttle for now + assertEquals(expectedLimit, tasksToRun.size()); + + // now execute all the tasks one by one + for (int i = 0; i < expectedLimit + 1; i++) { + assertEquals(i, runCount.get()); + tasksToRun.get(i).run(); + assertEquals(i + 1, runCount.get()); + + // executing the first task enqueues the final task + assertEquals(expectedLimit + 1, tasksToRun.size()); + } + } +}