Skip to content

Commit 81eb6e5

Browse files
committed
Call reroute even if there are no nodes under utilisation/latency threshold
1 parent 06beb49 commit 81eb6e5

File tree

2 files changed

+1
-95
lines changed

2 files changed

+1
-95
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintMonitor.java

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,30 +75,20 @@ public void onNewInfo(ClusterInfo clusterInfo) {
7575

7676
final int numberOfNodes = clusterInfo.getNodeUsageStatsForThreadPools().size();
7777
final Set<String> nodeIdsExceedingLatencyThreshold = Sets.newHashSetWithExpectedSize(numberOfNodes);
78-
final Set<String> potentialRelocationTargets = Sets.newHashSetWithExpectedSize(numberOfNodes);
7978
clusterInfo.getNodeUsageStatsForThreadPools().forEach((nodeId, usageStats) -> {
8079
final NodeUsageStatsForThreadPools.ThreadPoolUsageStats writeThreadPoolStats = usageStats.threadPoolUsageStatsMap()
8180
.get(ThreadPool.Names.WRITE);
8281
assert writeThreadPoolStats != null : "Write thread pool is not publishing usage stats for node [" + nodeId + "]";
8382
if (writeThreadPoolStats.maxThreadPoolQueueLatencyMillis() > writeLoadConstraintSettings.getQueueLatencyThreshold().millis()) {
8483
nodeIdsExceedingLatencyThreshold.add(nodeId);
85-
} else if (writeThreadPoolStats.averageThreadPoolUtilization() <= writeLoadConstraintSettings.getHighUtilizationThreshold()) {
86-
potentialRelocationTargets.add(nodeId);
8784
}
8885
});
89-
assert Sets.intersection(nodeIdsExceedingLatencyThreshold, potentialRelocationTargets).isEmpty()
90-
: "We assume any nodes exceeding the latency threshold are not viable targets for relocation";
9186

9287
if (nodeIdsExceedingLatencyThreshold.isEmpty()) {
9388
logger.debug("No hot-spotting nodes detected");
9489
return;
9590
}
9691

97-
if (potentialRelocationTargets.isEmpty()) {
98-
logger.debug("No nodes are suitable as relocation targets");
99-
return;
100-
}
101-
10292
final long currentTimeMillis = currentTimeMillisSupplier.getAsLong();
10393
final long timeSinceLastRerouteMillis = currentTimeMillis - lastRerouteTimeMillis;
10494
final boolean haveCalledRerouteRecently = timeSinceLastRerouteMillis < writeLoadConstraintSettings.getMinimumRerouteInterval()
@@ -108,9 +98,8 @@ public void onNewInfo(ClusterInfo clusterInfo) {
10898
|| Sets.difference(nodeIdsExceedingLatencyThreshold, lastSetOfHotSpottedNodes).isEmpty() == false) {
10999
if (logger.isDebugEnabled()) {
110100
logger.debug(
111-
"Found {} exceeding the write thread pool queue latency threshold ({} with capacity, {} total), triggering reroute",
101+
"Found {} exceeding the write thread pool queue latency threshold ({} total), triggering reroute",
112102
nodeSummary(nodeIdsExceedingLatencyThreshold),
113-
nodeSummary(potentialRelocationTargets),
114103
state.nodes().size()
115104
);
116105
}

server/src/test/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintMonitorTests.java

Lines changed: 0 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -164,89 +164,6 @@ public void testRerouteIsNotCalledWhenNoNodesAreHotSpotting() {
164164
}
165165
}
166166

167-
@TestLogging(
168-
value = "org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintMonitor:DEBUG",
169-
reason = "ensure we're skipping reroute for the right reason"
170-
)
171-
public void testRerouteIsNotCalledWhenNoNodeIsUnderUtilizationThreshold() {
172-
final TestState testState = createRandomTestStateThatWillTriggerReroute();
173-
174-
// Transform the node usage stats so that all nodes are at the high-utilization threshold
175-
final var nodeUsageStatsWithHighUtilization = Maps.transformValues(
176-
testState.clusterInfo.getNodeUsageStatsForThreadPools(),
177-
stats -> new NodeUsageStatsForThreadPools(
178-
stats.nodeId(),
179-
Maps.transformValues(
180-
stats.threadPoolUsageStatsMap(),
181-
tpStats -> new NodeUsageStatsForThreadPools.ThreadPoolUsageStats(
182-
tpStats.totalThreadPoolThreads(),
183-
(testState.highUtilizationThresholdPercent + 1) / 100f,
184-
tpStats.maxThreadPoolQueueLatencyMillis()
185-
)
186-
)
187-
)
188-
);
189-
190-
final WriteLoadConstraintMonitor writeLoadConstraintMonitor = new WriteLoadConstraintMonitor(
191-
testState.clusterSettings,
192-
testState.currentTimeSupplier,
193-
() -> testState.clusterState,
194-
testState.mockRerouteService
195-
);
196-
197-
try (MockLog mockLog = MockLog.capture(WriteLoadConstraintMonitor.class)) {
198-
mockLog.addExpectation(
199-
new MockLog.SeenEventExpectation(
200-
"don't reroute due to all nodes exceeding utilization threshold",
201-
WriteLoadConstraintMonitor.class.getCanonicalName(),
202-
Level.DEBUG,
203-
"No nodes are suitable as relocation targets"
204-
)
205-
);
206-
writeLoadConstraintMonitor.onNewInfo(
207-
ClusterInfo.builder().nodeUsageStatsForThreadPools(nodeUsageStatsWithHighUtilization).build()
208-
);
209-
mockLog.assertAllExpectationsMatched();
210-
verifyNoInteractions(testState.mockRerouteService);
211-
}
212-
}
213-
214-
@TestLogging(
215-
value = "org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintMonitor:DEBUG",
216-
reason = "ensure we're skipping reroute for the right reason"
217-
)
218-
public void testRerouteIsNotCalledWhenNoNodeIsUnderLatencyThreshold() {
219-
final TestState testState = createRandomTestStateThatWillTriggerReroute();
220-
221-
final ClusterInfo clusterInfoWithAllNodesOverLatencyThreshold = createClusterInfoWithHotSpots(
222-
testState.clusterState,
223-
testState.numberOfNodes,
224-
testState.latencyThresholdMillis,
225-
testState.highUtilizationThresholdPercent
226-
);
227-
228-
final WriteLoadConstraintMonitor writeLoadConstraintMonitor = new WriteLoadConstraintMonitor(
229-
testState.clusterSettings,
230-
testState.currentTimeSupplier,
231-
() -> testState.clusterState,
232-
testState.mockRerouteService
233-
);
234-
235-
try (MockLog mockLog = MockLog.capture(WriteLoadConstraintMonitor.class)) {
236-
mockLog.addExpectation(
237-
new MockLog.SeenEventExpectation(
238-
"don't reroute due to all nodes exceeding latency threshold",
239-
WriteLoadConstraintMonitor.class.getCanonicalName(),
240-
Level.DEBUG,
241-
"No nodes are suitable as relocation targets"
242-
)
243-
);
244-
writeLoadConstraintMonitor.onNewInfo(clusterInfoWithAllNodesOverLatencyThreshold);
245-
mockLog.assertAllExpectationsMatched();
246-
verifyNoInteractions(testState.mockRerouteService);
247-
}
248-
}
249-
250167
@TestLogging(
251168
value = "org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintMonitor:DEBUG",
252169
reason = "ensure we're skipping reroute for the right reason"

0 commit comments

Comments
 (0)