Skip to content

Commit b816a15

Browse files
committed
Include hot-spotted and under-threshold node IDs in reason, in-line callReroute
1 parent d3308d2 commit b816a15

File tree

1 file changed

+24
-16
lines changed

1 file changed

+24
-16
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintMonitor.java

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
*/
3737
public class WriteLoadConstraintMonitor {
3838
private static final Logger logger = LogManager.getLogger(WriteLoadConstraintMonitor.class);
39+
private static final int MAX_NODE_IDS_IN_MESSAGE = 3;
3940
private final WriteLoadConstraintSettings writeLoadConstraintSettings;
4041
private final Supplier<ClusterState> clusterStateSupplier;
4142
private final LongSupplier currentTimeMillisSupplier;
@@ -116,26 +117,33 @@ public void onNewInfo(ClusterInfo clusterInfo) {
116117

117118
if (haveCalledRerouteRecently == false
118119
|| Sets.difference(nodeIdsExceedingLatencyThreshold, lastSetOfHotSpottedNodes).isEmpty() == false) {
119-
callReroute(nodeIdsExceedingLatencyThreshold);
120+
final String reason = Strings.format(
121+
"write load constraint monitor: "
122+
+ "Found %s exceeding the write thread pool queue latency threshold (%s below utilization threshold, %d total)",
123+
nodeSummary(nodeIdsExceedingLatencyThreshold),
124+
nodeSummary(nodeIdsBelowUtilizationThreshold),
125+
state.nodes().size()
126+
);
127+
rerouteService.reroute(
128+
reason,
129+
Priority.NORMAL,
130+
ActionListener.wrap(
131+
ignored -> logger.trace("{} reroute successful", reason),
132+
e -> logger.debug(() -> Strings.format("reroute failed, reason: %s", reason), e)
133+
)
134+
);
135+
lastRerouteTimeMillis = currentTimeMillisSupplier.getAsLong();
136+
lastSetOfHotSpottedNodes = nodeIdsExceedingLatencyThreshold;
120137
} else {
121138
logger.debug("Not calling reroute because we called reroute recently and there are no new hot spots");
122139
}
123140
}
124141

125-
private void callReroute(Set<String> hotSpottedNodes) {
126-
final String reason = Strings.format(
127-
"write load constraint monitor: Found %d node(s) exceeding the write thread pool queue latency threshold",
128-
hotSpottedNodes.size()
129-
);
130-
rerouteService.reroute(
131-
reason,
132-
Priority.NORMAL,
133-
ActionListener.wrap(
134-
ignored -> logger.trace("{} reroute successful", reason),
135-
e -> logger.debug(() -> Strings.format("reroute failed, reason: %s", reason), e)
136-
)
137-
);
138-
lastRerouteTimeMillis = currentTimeMillisSupplier.getAsLong();
139-
lastSetOfHotSpottedNodes = hotSpottedNodes;
142+
private static String nodeSummary(Set<String> nodeIds) {
143+
if (nodeIds.size() < MAX_NODE_IDS_IN_MESSAGE) {
144+
return "[" + String.join(", ", nodeIds) + "]";
145+
} else {
146+
return nodeIds.size() + " nodes";
147+
}
140148
}
141149
}

0 commit comments

Comments
 (0)