Skip to content

Commit c295cac

Browse files
authored
Merge branch 'main' into mtv8
2 parents 26b96c5 + dc743a9 commit c295cac

File tree

11 files changed

+198
-63
lines changed

11 files changed

+198
-63
lines changed

server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515
import org.elasticsearch.action.admin.indices.stats.IndicesStatsAction;
1616
import org.elasticsearch.action.admin.indices.stats.ShardStats;
1717
import org.elasticsearch.action.admin.indices.stats.TransportIndicesStatsAction;
18-
import org.elasticsearch.cluster.ClusterInfoService;
19-
import org.elasticsearch.cluster.ClusterInfoServiceUtils;
20-
import org.elasticsearch.cluster.InternalClusterInfoService;
2118
import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
2219
import org.elasticsearch.cluster.metadata.IndexMetadata;
2320
import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -26,6 +23,7 @@
2623
import org.elasticsearch.cluster.routing.ShardRouting;
2724
import org.elasticsearch.cluster.routing.UnassignedInfo;
2825
import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings;
26+
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceMetrics;
2927
import org.elasticsearch.cluster.service.ClusterService;
3028
import org.elasticsearch.common.settings.Settings;
3129
import org.elasticsearch.common.util.CollectionUtils;
@@ -36,6 +34,8 @@
3634
import org.elasticsearch.index.shard.ShardPath;
3735
import org.elasticsearch.index.store.StoreStats;
3836
import org.elasticsearch.plugins.Plugin;
37+
import org.elasticsearch.plugins.PluginsService;
38+
import org.elasticsearch.telemetry.TestTelemetryPlugin;
3939
import org.elasticsearch.test.ClusterServiceUtils;
4040
import org.elasticsearch.test.ESIntegTestCase;
4141
import org.elasticsearch.test.transport.MockTransportService;
@@ -45,19 +45,30 @@
4545
import java.nio.file.Path;
4646
import java.util.ArrayList;
4747
import java.util.Collection;
48+
import java.util.HashMap;
4849
import java.util.List;
4950
import java.util.Map;
51+
import java.util.concurrent.CountDownLatch;
5052

53+
import static java.util.stream.IntStream.range;
5154
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
5255
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
5356
import static org.elasticsearch.cluster.routing.ShardMovementWriteLoadSimulator.calculateUtilizationForWriteLoad;
57+
import static org.hamcrest.Matchers.everyItem;
58+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
59+
import static org.hamcrest.Matchers.hasSize;
5460

5561
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
5662
public class WriteLoadConstraintDeciderIT extends ESIntegTestCase {
5763

5864
@Override
65+
@SuppressWarnings("unchecked")
5966
protected Collection<Class<? extends Plugin>> getMockPlugins() {
60-
return CollectionUtils.appendToCopy(super.nodePlugins(), MockTransportService.TestPlugin.class);
67+
return CollectionUtils.appendToCopyNoNullElements(
68+
super.nodePlugins(),
69+
MockTransportService.TestPlugin.class,
70+
TestTelemetryPlugin.class
71+
);
6172
}
6273

6374
/**
@@ -236,11 +247,7 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() {
236247
*/
237248

238249
logger.info("---> Refreshing the cluster info to pull in the dummy thread pool stats with a hot-spotting node");
239-
final InternalClusterInfoService clusterInfoService = asInstanceOf(
240-
InternalClusterInfoService.class,
241-
internalCluster().getInstance(ClusterInfoService.class, masterName)
242-
);
243-
ClusterInfoServiceUtils.refresh(clusterInfoService);
250+
refreshClusterInfo();
244251

245252
logger.info(
246253
"---> Update the filter to exclude " + firstDataNodeName + " so that shards will be reassigned away to the other nodes"
@@ -263,6 +270,57 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() {
263270
}));
264271
}
265272

273+
public void testMaxQueueLatencyMetricIsPublished() {
274+
final Settings settings = Settings.builder()
275+
.put(
276+
WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
277+
WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
278+
)
279+
.build();
280+
final var dataNodes = internalCluster().startNodes(3, settings);
281+
282+
// Refresh cluster info (should trigger polling)
283+
refreshClusterInfo();
284+
285+
Map<String, Long> mostRecentQueueLatencyMetrics = getMostRecentQueueLatencyMetrics(dataNodes);
286+
assertThat(mostRecentQueueLatencyMetrics.keySet(), hasSize(dataNodes.size()));
287+
assertThat(mostRecentQueueLatencyMetrics.values(), everyItem(greaterThanOrEqualTo(0L)));
288+
289+
final String dataNodeToDelay = randomFrom(dataNodes);
290+
final ThreadPool threadPoolToDelay = internalCluster().getInstance(ThreadPool.class, dataNodeToDelay);
291+
292+
// Fill the write thread pool and block a task for some time
293+
final int writeThreadPoolSize = threadPoolToDelay.info(ThreadPool.Names.WRITE).getMax();
294+
final var latch = new CountDownLatch(1);
295+
final var writeThreadPool = threadPoolToDelay.executor(ThreadPool.Names.WRITE);
296+
range(0, writeThreadPoolSize + 1).forEach(i -> writeThreadPool.execute(() -> safeAwait(latch)));
297+
final long delayMillis = randomIntBetween(100, 200);
298+
safeSleep(delayMillis);
299+
// Unblock the pool
300+
latch.countDown();
301+
302+
refreshClusterInfo();
303+
mostRecentQueueLatencyMetrics = getMostRecentQueueLatencyMetrics(dataNodes);
304+
assertThat(mostRecentQueueLatencyMetrics.keySet(), hasSize(dataNodes.size()));
305+
assertThat(mostRecentQueueLatencyMetrics.get(dataNodeToDelay), greaterThanOrEqualTo(delayMillis));
306+
}
307+
308+
private static Map<String, Long> getMostRecentQueueLatencyMetrics(List<String> dataNodes) {
309+
final Map<String, Long> measurements = new HashMap<>();
310+
for (String nodeName : dataNodes) {
311+
PluginsService pluginsService = internalCluster().getInstance(PluginsService.class, nodeName);
312+
final TestTelemetryPlugin telemetryPlugin = pluginsService.filterPlugins(TestTelemetryPlugin.class).findFirst().orElseThrow();
313+
telemetryPlugin.collect();
314+
final var maxLatencyValues = telemetryPlugin.getLongGaugeMeasurement(
315+
DesiredBalanceMetrics.WRITE_LOAD_DECIDER_MAX_LATENCY_VALUE
316+
);
317+
if (maxLatencyValues.isEmpty() == false) {
318+
measurements.put(nodeName, maxLatencyValues.getLast().getLong());
319+
}
320+
}
321+
return measurements;
322+
}
323+
266324
/**
267325
* Verifies that the {@link RoutingNodes} shows that the expected portion of an index's shards are assigned to each node.
268326
*/

server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodeUsageStatsForThreadPoolsAction.java

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,30 @@
99

1010
package org.elasticsearch.action.admin.cluster.node.usage;
1111

12-
import org.apache.logging.log4j.LogManager;
13-
import org.apache.logging.log4j.Logger;
1412
import org.elasticsearch.action.ActionType;
1513
import org.elasticsearch.action.FailedNodeException;
1614
import org.elasticsearch.action.support.ActionFilters;
1715
import org.elasticsearch.action.support.nodes.TransportNodesAction;
1816
import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
1917
import org.elasticsearch.cluster.NodeUsageStatsForThreadPools.ThreadPoolUsageStats;
2018
import org.elasticsearch.cluster.node.DiscoveryNode;
19+
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceMetrics;
2120
import org.elasticsearch.cluster.service.ClusterService;
2221
import org.elasticsearch.common.io.stream.StreamInput;
2322
import org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor;
2423
import org.elasticsearch.injection.guice.Inject;
2524
import org.elasticsearch.tasks.Task;
25+
import org.elasticsearch.telemetry.metric.LongWithAttributes;
2626
import org.elasticsearch.threadpool.ThreadPool;
2727
import org.elasticsearch.transport.TransportService;
2828

2929
import java.io.IOException;
30+
import java.util.Collection;
3031
import java.util.HashMap;
3132
import java.util.List;
3233
import java.util.Map;
34+
import java.util.Set;
35+
import java.util.concurrent.atomic.AtomicLong;
3336

3437
/**
3538
* Collects some thread pool stats from each data node for purposes of shard allocation balancing. The specific stats are defined in
@@ -42,20 +45,21 @@ public class TransportNodeUsageStatsForThreadPoolsAction extends TransportNodesA
4245
NodeUsageStatsForThreadPoolsAction.NodeResponse,
4346
Void> {
4447

45-
private static final Logger logger = LogManager.getLogger(TransportNodeUsageStatsForThreadPoolsAction.class);
46-
4748
public static final String NAME = "internal:monitor/thread_pool/stats";
4849
public static final ActionType<NodeUsageStatsForThreadPoolsAction.Response> TYPE = new ActionType<>(NAME);
50+
private static final int NO_VALUE = -1;
4951

5052
private final ThreadPool threadPool;
5153
private final ClusterService clusterService;
54+
private final AtomicLong lastMaxQueueLatencyMillis = new AtomicLong(NO_VALUE);
5255

5356
@Inject
5457
public TransportNodeUsageStatsForThreadPoolsAction(
5558
ThreadPool threadPool,
5659
ClusterService clusterService,
5760
TransportService transportService,
58-
ActionFilters actionFilters
61+
ActionFilters actionFilters,
62+
DesiredBalanceMetrics desiredBalanceMetrics
5963
) {
6064
super(
6165
NAME,
@@ -67,6 +71,7 @@ public TransportNodeUsageStatsForThreadPoolsAction(
6771
);
6872
this.threadPool = threadPool;
6973
this.clusterService = clusterService;
74+
desiredBalanceMetrics.registerWriteLoadDeciderMaxLatencyGauge(this::getMaxQueueLatencyMetric);
7075
}
7176

7277
@Override
@@ -99,15 +104,17 @@ protected NodeUsageStatsForThreadPoolsAction.NodeResponse nodeOperation(
99104
assert writeExecutor instanceof TaskExecutionTimeTrackingEsThreadPoolExecutor;
100105
var trackingForWriteExecutor = (TaskExecutionTimeTrackingEsThreadPoolExecutor) writeExecutor;
101106

107+
long maxQueueLatencyMillis = Math.max(
108+
trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset(),
109+
trackingForWriteExecutor.peekMaxQueueLatencyInQueueMillis()
110+
);
111+
lastMaxQueueLatencyMillis.set(maxQueueLatencyMillis);
102112
ThreadPoolUsageStats threadPoolUsageStats = new ThreadPoolUsageStats(
103113
trackingForWriteExecutor.getMaximumPoolSize(),
104114
(float) trackingForWriteExecutor.pollUtilization(
105115
TaskExecutionTimeTrackingEsThreadPoolExecutor.UtilizationTrackingPurpose.ALLOCATION
106116
),
107-
Math.max(
108-
trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset(),
109-
trackingForWriteExecutor.peekMaxQueueLatencyInQueueMillis()
110-
)
117+
maxQueueLatencyMillis
111118
);
112119

113120
Map<String, ThreadPoolUsageStats> perThreadPool = new HashMap<>();
@@ -117,4 +124,13 @@ protected NodeUsageStatsForThreadPoolsAction.NodeResponse nodeOperation(
117124
new NodeUsageStatsForThreadPools(localNode.getId(), perThreadPool)
118125
);
119126
}
127+
128+
private Collection<LongWithAttributes> getMaxQueueLatencyMetric() {
129+
long maxQueueLatencyValue = lastMaxQueueLatencyMillis.getAndSet(NO_VALUE);
130+
if (maxQueueLatencyValue != NO_VALUE) {
131+
return Set.of(new LongWithAttributes(maxQueueLatencyValue));
132+
} else {
133+
return Set.of();
134+
}
135+
}
120136
}

server/src/main/java/org/elasticsearch/cluster/ClusterModule.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator;
4444
import org.elasticsearch.cluster.routing.allocation.allocator.BalancerSettings;
4545
import org.elasticsearch.cluster.routing.allocation.allocator.BalancingWeightsFactory;
46+
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceMetrics;
4647
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceShardsAllocator;
4748
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceShardsAllocator.DesiredBalanceReconcilerAction;
4849
import org.elasticsearch.cluster.routing.allocation.allocator.GlobalBalancingWeightsFactory;
@@ -138,6 +139,7 @@ public class ClusterModule extends AbstractModule {
138139
private final ShardRoutingRoleStrategy shardRoutingRoleStrategy;
139140
private final AllocationStatsService allocationStatsService;
140141
private final TelemetryProvider telemetryProvider;
142+
private final DesiredBalanceMetrics desiredBalanceMetrics;
141143

142144
public ClusterModule(
143145
Settings settings,
@@ -164,6 +166,7 @@ public ClusterModule(
164166
writeLoadForecaster,
165167
balancingWeightsFactory
166168
);
169+
this.desiredBalanceMetrics = new DesiredBalanceMetrics(telemetryProvider.getMeterRegistry());
167170
this.shardsAllocator = createShardsAllocator(
168171
settings,
169172
clusterService.getClusterSettings(),
@@ -174,9 +177,9 @@ public ClusterModule(
174177
clusterService,
175178
this::reconcile,
176179
writeLoadForecaster,
177-
telemetryProvider,
178180
nodeAllocationStatsAndWeightsCalculator,
179-
this::explainShardAllocation
181+
this::explainShardAllocation,
182+
desiredBalanceMetrics
180183
);
181184
this.clusterService = clusterService;
182185
this.indexNameExpressionResolver = new IndexNameExpressionResolver(threadPool.getThreadContext(), systemIndices, projectResolver);
@@ -497,9 +500,9 @@ private static ShardsAllocator createShardsAllocator(
497500
ClusterService clusterService,
498501
DesiredBalanceReconcilerAction reconciler,
499502
WriteLoadForecaster writeLoadForecaster,
500-
TelemetryProvider telemetryProvider,
501503
NodeAllocationStatsAndWeightsCalculator nodeAllocationStatsAndWeightsCalculator,
502-
ShardAllocationExplainer shardAllocationExplainer
504+
ShardAllocationExplainer shardAllocationExplainer,
505+
DesiredBalanceMetrics desiredBalanceMetrics
503506
) {
504507
Map<String, Supplier<ShardsAllocator>> allocators = new HashMap<>();
505508
allocators.put(
@@ -514,9 +517,9 @@ private static ShardsAllocator createShardsAllocator(
514517
threadPool,
515518
clusterService,
516519
reconciler,
517-
telemetryProvider,
518520
nodeAllocationStatsAndWeightsCalculator,
519-
shardAllocationExplainer
521+
shardAllocationExplainer,
522+
desiredBalanceMetrics
520523
)
521524
);
522525

@@ -561,6 +564,7 @@ protected void configure() {
561564
bind(ShardRoutingRoleStrategy.class).toInstance(shardRoutingRoleStrategy);
562565
bind(AllocationStatsService.class).toInstance(allocationStatsService);
563566
bind(TelemetryProvider.class).toInstance(telemetryProvider);
567+
bind(DesiredBalanceMetrics.class).toInstance(desiredBalanceMetrics);
564568
bind(MetadataRolloverService.class).asEagerSingleton();
565569
}
566570

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
import org.elasticsearch.telemetry.metric.MeterRegistry;
1919

2020
import java.util.ArrayList;
21+
import java.util.Collection;
2122
import java.util.List;
2223
import java.util.Map;
2324
import java.util.concurrent.atomic.AtomicReference;
25+
import java.util.function.Supplier;
2426
import java.util.function.ToLongFunction;
2527

2628
/**
@@ -31,6 +33,8 @@
3133
*/
3234
public class DesiredBalanceMetrics {
3335

36+
public static DesiredBalanceMetrics NOOP = new DesiredBalanceMetrics(MeterRegistry.NOOP);
37+
3438
/**
3539
* @param unassignedShards Shards that are not assigned to any node.
3640
* @param allocationStatsByRole A breakdown of the allocations stats by {@link ShardRouting.Role}
@@ -124,8 +128,12 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w
124128
public static final String CURRENT_NODE_FORECASTED_DISK_USAGE_METRIC_NAME =
125129
"es.allocator.allocations.node.forecasted_disk_usage_bytes.current";
126130

131+
// Decider metrics
132+
public static final String WRITE_LOAD_DECIDER_MAX_LATENCY_VALUE = "es.allocator.deciders.write_load.max_latency_value.current";
133+
127134
public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats(0, Map.of());
128135

136+
private final MeterRegistry meterRegistry;
129137
private volatile boolean nodeIsMaster = false;
130138

131139
/**
@@ -153,6 +161,7 @@ public void updateMetrics(
153161
}
154162

155163
public DesiredBalanceMetrics(MeterRegistry meterRegistry) {
164+
this.meterRegistry = meterRegistry;
156165
meterRegistry.registerLongsGauge(
157166
UNASSIGNED_SHARDS_METRIC_NAME,
158167
"Current number of unassigned shards",
@@ -260,6 +269,15 @@ public AllocationStats allocationStats() {
260269
return lastReconciliationAllocationStats;
261270
}
262271

272+
public void registerWriteLoadDeciderMaxLatencyGauge(Supplier<Collection<LongWithAttributes>> maxLatencySupplier) {
273+
meterRegistry.registerLongsGauge(
274+
WRITE_LOAD_DECIDER_MAX_LATENCY_VALUE,
275+
"max latency for write load decider",
276+
"ms",
277+
maxLatencySupplier
278+
);
279+
}
280+
263281
private List<LongWithAttributes> getUnassignedShardsMetrics() {
264282
return getIfPublishing(AllocationStats::unassignedShards);
265283
}

0 commit comments

Comments
 (0)