Skip to content

Commit 31e3c55

Browse files
authored
Implement WriteLoadConstraintMonitor (#132917)
1 parent 4a745b5 commit 31e3c55

File tree

5 files changed

+481
-67
lines changed

5 files changed

+481
-67
lines changed

server/src/main/java/org/elasticsearch/cluster/NodeUsageStatsForThreadPools.java

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -89,33 +89,5 @@ public void writeTo(StreamOutput out) throws IOException {
8989
out.writeFloat(this.averageThreadPoolUtilization);
9090
out.writeVLong(this.maxThreadPoolQueueLatencyMillis);
9191
}
92-
93-
@Override
94-
public int hashCode() {
95-
return Objects.hash(totalThreadPoolThreads, averageThreadPoolUtilization, maxThreadPoolQueueLatencyMillis);
96-
}
97-
98-
@Override
99-
public String toString() {
100-
return "[totalThreadPoolThreads="
101-
+ totalThreadPoolThreads
102-
+ ", averageThreadPoolUtilization="
103-
+ averageThreadPoolUtilization
104-
+ ", maxThreadPoolQueueLatencyMillis="
105-
+ maxThreadPoolQueueLatencyMillis
106-
+ "]";
107-
}
108-
109-
@Override
110-
public boolean equals(Object o) {
111-
if (this == o) return true;
112-
if (o == null || getClass() != o.getClass()) return false;
113-
ThreadPoolUsageStats other = (ThreadPoolUsageStats) o;
114-
return totalThreadPoolThreads == other.totalThreadPoolThreads
115-
&& averageThreadPoolUtilization == other.averageThreadPoolUtilization
116-
&& maxThreadPoolQueueLatencyMillis == other.maxThreadPoolQueueLatencyMillis;
117-
}
118-
119-
} // ThreadPoolUsageStats
120-
92+
}
12193
}

server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintMonitor.java

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,32 @@
1515
import org.elasticsearch.cluster.ClusterInfo;
1616
import org.elasticsearch.cluster.ClusterInfoService;
1717
import org.elasticsearch.cluster.ClusterState;
18+
import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
1819
import org.elasticsearch.cluster.routing.RerouteService;
1920
import org.elasticsearch.common.Priority;
21+
import org.elasticsearch.common.Strings;
2022
import org.elasticsearch.common.settings.ClusterSettings;
23+
import org.elasticsearch.common.util.set.Sets;
2124
import org.elasticsearch.gateway.GatewayService;
25+
import org.elasticsearch.threadpool.ThreadPool;
2226

27+
import java.util.Set;
2328
import java.util.function.LongSupplier;
2429
import java.util.function.Supplier;
2530

2631
/**
27-
* Monitors the node-level write thread pool usage across the cluster and initiates (coming soon) a rebalancing round (via
32+
* Monitors the node-level write thread pool usage across the cluster and initiates a rebalancing round (via
2833
* {@link RerouteService#reroute}) whenever a node crosses the node-level write load thresholds.
29-
*
30-
* TODO (ES-11992): implement
3134
*/
3235
public class WriteLoadConstraintMonitor {
3336
private static final Logger logger = LogManager.getLogger(WriteLoadConstraintMonitor.class);
37+
private static final int MAX_NODE_IDS_IN_MESSAGE = 3;
3438
private final WriteLoadConstraintSettings writeLoadConstraintSettings;
3539
private final Supplier<ClusterState> clusterStateSupplier;
3640
private final LongSupplier currentTimeMillisSupplier;
3741
private final RerouteService rerouteService;
42+
private volatile long lastRerouteTimeMillis = 0;
43+
private volatile Set<String> lastSetOfHotSpottedNodes = Set.of();
3844

3945
public WriteLoadConstraintMonitor(
4046
ClusterSettings clusterSettings,
@@ -60,29 +66,64 @@ public void onNewInfo(ClusterInfo clusterInfo) {
6066
return;
6167
}
6268

63-
if (writeLoadConstraintSettings.getWriteLoadConstraintEnabled() == WriteLoadConstraintSettings.WriteLoadDeciderStatus.DISABLED) {
64-
logger.trace("skipping monitor because the write load decider is disabled");
69+
if (writeLoadConstraintSettings.getWriteLoadConstraintEnabled().notFullyEnabled()) {
70+
logger.debug("skipping monitor because the write load decider is not fully enabled");
6571
return;
6672
}
6773

6874
logger.trace("processing new cluster info");
6975

70-
boolean reroute = false;
71-
String explanation = "";
72-
final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
76+
final int numberOfNodes = clusterInfo.getNodeUsageStatsForThreadPools().size();
77+
final Set<String> nodeIdsExceedingLatencyThreshold = Sets.newHashSetWithExpectedSize(numberOfNodes);
78+
clusterInfo.getNodeUsageStatsForThreadPools().forEach((nodeId, usageStats) -> {
79+
final NodeUsageStatsForThreadPools.ThreadPoolUsageStats writeThreadPoolStats = usageStats.threadPoolUsageStatsMap()
80+
.get(ThreadPool.Names.WRITE);
81+
assert writeThreadPoolStats != null : "Write thread pool is not publishing usage stats for node [" + nodeId + "]";
82+
if (writeThreadPoolStats.maxThreadPoolQueueLatencyMillis() > writeLoadConstraintSettings.getQueueLatencyThreshold().millis()) {
83+
nodeIdsExceedingLatencyThreshold.add(nodeId);
84+
}
85+
});
7386

74-
// TODO (ES-11992): implement
87+
if (nodeIdsExceedingLatencyThreshold.isEmpty()) {
88+
logger.debug("No hot-spotting nodes detected");
89+
return;
90+
}
7591

76-
if (reroute) {
77-
logger.debug("rerouting shards: [{}]", explanation);
78-
rerouteService.reroute("disk threshold monitor", Priority.NORMAL, ActionListener.wrap(ignored -> {
79-
final var reroutedClusterState = clusterStateSupplier.get();
92+
final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
93+
final long timeSinceLastRerouteMillis = currentTimeMillis - lastRerouteTimeMillis;
94+
final boolean haveCalledRerouteRecently = timeSinceLastRerouteMillis < writeLoadConstraintSettings.getMinimumRerouteInterval()
95+
.millis();
8096

81-
// TODO (ES-11992): implement
97+
if (haveCalledRerouteRecently == false
98+
|| Sets.difference(nodeIdsExceedingLatencyThreshold, lastSetOfHotSpottedNodes).isEmpty() == false) {
99+
if (logger.isDebugEnabled()) {
100+
logger.debug(
101+
"Found {} exceeding the write thread pool queue latency threshold ({} total), triggering reroute",
102+
nodeSummary(nodeIdsExceedingLatencyThreshold),
103+
state.nodes().size()
104+
);
105+
}
106+
final String reason = "hot-spotting detected by write load constraint monitor";
107+
rerouteService.reroute(
108+
reason,
109+
Priority.NORMAL,
110+
ActionListener.wrap(
111+
ignored -> logger.trace("{} reroute successful", reason),
112+
e -> logger.debug(() -> Strings.format("reroute failed, reason: %s", reason), e)
113+
)
114+
);
115+
lastRerouteTimeMillis = currentTimeMillisSupplier.getAsLong();
116+
lastSetOfHotSpottedNodes = nodeIdsExceedingLatencyThreshold;
117+
} else {
118+
logger.debug("Not calling reroute because we called reroute recently and there are no new hot spots");
119+
}
120+
}
82121

83-
}, e -> logger.debug("reroute failed", e)));
122+
private static String nodeSummary(Set<String> nodeIds) {
123+
if (nodeIds.isEmpty() == false && nodeIds.size() <= MAX_NODE_IDS_IN_MESSAGE) {
124+
return "[" + String.join(", ", nodeIds) + "]";
84125
} else {
85-
logger.trace("no reroute required");
126+
return nodeIds.size() + " nodes";
86127
}
87128
}
88129
}

server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintSettings.java

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -107,41 +107,40 @@ public boolean disabled() {
107107
Setting.Property.NodeScope
108108
);
109109

110-
WriteLoadDeciderStatus writeLoadDeciderStatus;
111-
TimeValue writeLoadDeciderRerouteIntervalSetting;
112-
double writeThreadPoolHighUtilizationThresholdSetting;
110+
private volatile WriteLoadDeciderStatus writeLoadDeciderStatus;
111+
private volatile TimeValue minimumRerouteInterval;
112+
private volatile double highUtilizationThreshold;
113+
private volatile TimeValue queueLatencyThreshold;
113114

114115
public WriteLoadConstraintSettings(ClusterSettings clusterSettings) {
115-
clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_ENABLED_SETTING, this::setWriteLoadConstraintEnabled);
116-
clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING, this::setWriteLoadDeciderRerouteIntervalSetting);
116+
clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_ENABLED_SETTING, status -> this.writeLoadDeciderStatus = status);
117+
clusterSettings.initializeAndWatch(
118+
WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING,
119+
timeValue -> this.minimumRerouteInterval = timeValue
120+
);
117121
clusterSettings.initializeAndWatch(
118122
WRITE_LOAD_DECIDER_HIGH_UTILIZATION_THRESHOLD_SETTING,
119-
this::setWriteThreadPoolHighUtilizationThresholdSetting
123+
value -> highUtilizationThreshold = value.getAsRatio()
120124
);
121-
122-
};
123-
124-
private void setWriteLoadConstraintEnabled(WriteLoadDeciderStatus status) {
125-
this.writeLoadDeciderStatus = status;
125+
clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_QUEUE_LATENCY_THRESHOLD_SETTING, value -> queueLatencyThreshold = value);
126126
}
127127

128128
public WriteLoadDeciderStatus getWriteLoadConstraintEnabled() {
129129
return this.writeLoadDeciderStatus;
130130
}
131131

132-
public TimeValue getWriteLoadDeciderRerouteIntervalSetting() {
133-
return this.writeLoadDeciderRerouteIntervalSetting;
132+
public TimeValue getMinimumRerouteInterval() {
133+
return this.minimumRerouteInterval;
134134
}
135135

136-
public double getWriteThreadPoolHighUtilizationThresholdSetting() {
137-
return this.writeThreadPoolHighUtilizationThresholdSetting;
136+
public TimeValue getQueueLatencyThreshold() {
137+
return this.queueLatencyThreshold;
138138
}
139139

140-
private void setWriteLoadDeciderRerouteIntervalSetting(TimeValue timeValue) {
141-
this.writeLoadDeciderRerouteIntervalSetting = timeValue;
142-
}
143-
144-
private void setWriteThreadPoolHighUtilizationThresholdSetting(RatioValue percent) {
145-
this.writeThreadPoolHighUtilizationThresholdSetting = percent.getAsRatio();
140+
/**
141+
* @return The threshold as a ratio - i.e. in [0, 1]
142+
*/
143+
public double getHighUtilizationThreshold() {
144+
return this.highUtilizationThreshold;
146145
}
147146
}

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
6161
assert nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().isEmpty() == false;
6262
assert nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE) != null;
6363
var nodeWriteThreadPoolStats = nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
64-
var nodeWriteThreadPoolLoadThreshold = writeLoadConstraintSettings.getWriteThreadPoolHighUtilizationThresholdSetting();
64+
var nodeWriteThreadPoolLoadThreshold = writeLoadConstraintSettings.getHighUtilizationThreshold();
6565
if (nodeWriteThreadPoolStats.averageThreadPoolUtilization() >= nodeWriteThreadPoolLoadThreshold) {
6666
// The node's write thread pool usage stats already show high utilization above the threshold for accepting new shards.
6767
String explain = Strings.format(

0 commit comments

Comments
 (0)