Skip to content

Commit c4db2e3

Browse files
committed
Publish max queue latency metric
1 parent 1ff6608 commit c4db2e3

File tree

3 files changed

+68
-9
lines changed

3 files changed

+68
-9
lines changed

server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodeUsageStatsForThreadPoolsAction.java

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,30 @@
99

1010
package org.elasticsearch.action.admin.cluster.node.usage;
1111

12-
import org.apache.logging.log4j.LogManager;
13-
import org.apache.logging.log4j.Logger;
1412
import org.elasticsearch.action.ActionType;
1513
import org.elasticsearch.action.FailedNodeException;
1614
import org.elasticsearch.action.support.ActionFilters;
1715
import org.elasticsearch.action.support.nodes.TransportNodesAction;
1816
import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
1917
import org.elasticsearch.cluster.NodeUsageStatsForThreadPools.ThreadPoolUsageStats;
2018
import org.elasticsearch.cluster.node.DiscoveryNode;
19+
import org.elasticsearch.cluster.routing.allocation.AllocationDeciderMetrics;
2120
import org.elasticsearch.cluster.service.ClusterService;
2221
import org.elasticsearch.common.io.stream.StreamInput;
2322
import org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor;
2423
import org.elasticsearch.injection.guice.Inject;
2524
import org.elasticsearch.tasks.Task;
25+
import org.elasticsearch.telemetry.metric.LongWithAttributes;
2626
import org.elasticsearch.threadpool.ThreadPool;
2727
import org.elasticsearch.transport.TransportService;
2828

2929
import java.io.IOException;
30+
import java.util.Collection;
3031
import java.util.HashMap;
3132
import java.util.List;
3233
import java.util.Map;
34+
import java.util.Set;
35+
import java.util.concurrent.atomic.AtomicLong;
3336

3437
/**
3538
* Collects some thread pool stats from each data node for purposes of shard allocation balancing. The specific stats are defined in
@@ -42,20 +45,21 @@ public class TransportNodeUsageStatsForThreadPoolsAction extends TransportNodesA
4245
NodeUsageStatsForThreadPoolsAction.NodeResponse,
4346
Void> {
4447

45-
private static final Logger logger = LogManager.getLogger(TransportNodeUsageStatsForThreadPoolsAction.class);
46-
4748
public static final String NAME = "internal:monitor/thread_pool/stats";
4849
public static final ActionType<NodeUsageStatsForThreadPoolsAction.Response> TYPE = new ActionType<>(NAME);
50+
private static final int NO_VALUE = -1;
4951

5052
private final ThreadPool threadPool;
5153
private final ClusterService clusterService;
54+
private final AtomicLong lastMaxQueueLatencyMillis = new AtomicLong(NO_VALUE);
5255

5356
@Inject
5457
public TransportNodeUsageStatsForThreadPoolsAction(
5558
ThreadPool threadPool,
5659
ClusterService clusterService,
5760
TransportService transportService,
58-
ActionFilters actionFilters
61+
ActionFilters actionFilters,
62+
AllocationDeciderMetrics allocationDeciderMetrics
5963
) {
6064
super(
6165
NAME,
@@ -67,6 +71,7 @@ public TransportNodeUsageStatsForThreadPoolsAction(
6771
);
6872
this.threadPool = threadPool;
6973
this.clusterService = clusterService;
74+
allocationDeciderMetrics.registerWriteLoadDeciderMaxLatencyGauge(this::getMaxQueueLatencyMetric);
7075
}
7176

7277
@Override
@@ -99,15 +104,17 @@ protected NodeUsageStatsForThreadPoolsAction.NodeResponse nodeOperation(
99104
assert writeExecutor instanceof TaskExecutionTimeTrackingEsThreadPoolExecutor;
100105
var trackingForWriteExecutor = (TaskExecutionTimeTrackingEsThreadPoolExecutor) writeExecutor;
101106

107+
long maxQueueLatencyMillis = Math.max(
108+
trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset(),
109+
trackingForWriteExecutor.peekMaxQueueLatencyInQueueMillis()
110+
);
111+
lastMaxQueueLatencyMillis.set(maxQueueLatencyMillis);
102112
ThreadPoolUsageStats threadPoolUsageStats = new ThreadPoolUsageStats(
103113
trackingForWriteExecutor.getMaximumPoolSize(),
104114
(float) trackingForWriteExecutor.pollUtilization(
105115
TaskExecutionTimeTrackingEsThreadPoolExecutor.UtilizationTrackingPurpose.ALLOCATION
106116
),
107-
Math.max(
108-
trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset(),
109-
trackingForWriteExecutor.peekMaxQueueLatencyInQueueMillis()
110-
)
117+
maxQueueLatencyMillis
111118
);
112119

113120
Map<String, ThreadPoolUsageStats> perThreadPool = new HashMap<>();
@@ -117,4 +124,13 @@ protected NodeUsageStatsForThreadPoolsAction.NodeResponse nodeOperation(
117124
new NodeUsageStatsForThreadPools(localNode.getId(), perThreadPool)
118125
);
119126
}
127+
128+
private Collection<LongWithAttributes> getMaxQueueLatencyMetric() {
129+
long maxQueueLatencyValue = lastMaxQueueLatencyMillis.getAndSet(NO_VALUE);
130+
if (maxQueueLatencyValue != NO_VALUE) {
131+
return Set.of(new LongWithAttributes(maxQueueLatencyValue));
132+
} else {
133+
return Set.of();
134+
}
135+
}
120136
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.cluster.routing.allocation;
11+
12+
import org.elasticsearch.telemetry.metric.LongWithAttributes;
13+
import org.elasticsearch.telemetry.metric.MeterRegistry;
14+
15+
import java.util.Collection;
16+
import java.util.function.Supplier;
17+
18+
/**
19+
* A place where metrics related to allocation deciders can live
20+
*/
21+
public class AllocationDeciderMetrics {
22+
23+
public static final String WRITE_LOAD_DECIDER_MAX_LATENCY_VALUE = "es.allocation.decider.write_load.max_latency_value.current";
24+
25+
private final MeterRegistry meterRegistry;
26+
27+
public AllocationDeciderMetrics(MeterRegistry meterRegistry) {
28+
this.meterRegistry = meterRegistry;
29+
}
30+
31+
public void registerWriteLoadDeciderMaxLatencyGauge(Supplier<Collection<LongWithAttributes>> maxLatencySupplier) {
32+
meterRegistry.registerLongsGauge(
33+
WRITE_LOAD_DECIDER_MAX_LATENCY_VALUE,
34+
"max latency for write load decider",
35+
"unit",
36+
maxLatencySupplier
37+
);
38+
}
39+
}

server/src/main/java/org/elasticsearch/node/NodeConstruction.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import org.elasticsearch.cluster.project.ProjectResolverFactory;
5959
import org.elasticsearch.cluster.routing.BatchedRerouteService;
6060
import org.elasticsearch.cluster.routing.RerouteService;
61+
import org.elasticsearch.cluster.routing.allocation.AllocationDeciderMetrics;
6162
import org.elasticsearch.cluster.routing.allocation.AllocationService;
6263
import org.elasticsearch.cluster.routing.allocation.DiskThresholdMonitor;
6364
import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintMonitor;
@@ -1234,6 +1235,8 @@ public Map<String, String> queryFields() {
12341235

12351236
final ShutdownPrepareService shutdownPrepareService = new ShutdownPrepareService(settings, httpServerTransport, terminationHandler);
12361237

1238+
final AllocationDeciderMetrics allocationDeciderMetrics = new AllocationDeciderMetrics(telemetryProvider.getMeterRegistry());
1239+
12371240
modules.add(loadPersistentTasksService(settingsModule, clusterService, threadPool, clusterModule.getIndexNameExpressionResolver()));
12381241

12391242
modules.add(
@@ -1324,6 +1327,7 @@ public Map<String, String> queryFields() {
13241327
b.bind(ShutdownPrepareService.class).toInstance(shutdownPrepareService);
13251328
b.bind(OnlinePrewarmingService.class).toInstance(onlinePrewarmingService);
13261329
b.bind(MergeMetrics.class).toInstance(mergeMetrics);
1330+
b.bind(AllocationDeciderMetrics.class).toInstance(allocationDeciderMetrics);
13271331
});
13281332

13291333
if (ReadinessService.enabled(environment)) {

0 commit comments

Comments
 (0)