|
9 | 9 |
|
10 | 10 | package org.elasticsearch.cluster.routing.allocation.allocator; |
11 | 11 |
|
| 12 | +import org.elasticsearch.cluster.node.DiscoveryNode; |
| 13 | +import org.elasticsearch.cluster.node.DiscoveryNodeRole; |
12 | 14 | import org.elasticsearch.cluster.routing.RoutingNodes; |
13 | 15 | import org.elasticsearch.cluster.routing.ShardRouting; |
14 | 16 | import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; |
|
17 | 19 | import org.elasticsearch.common.settings.Setting; |
18 | 20 | import org.elasticsearch.common.time.TimeProvider; |
19 | 21 | import org.elasticsearch.common.util.FeatureFlag; |
| 22 | +import org.elasticsearch.core.Releasable; |
20 | 23 | import org.elasticsearch.core.TimeValue; |
21 | 24 | import org.elasticsearch.index.shard.ShardId; |
22 | 25 | import org.elasticsearch.logging.LogManager; |
@@ -82,6 +85,7 @@ public class UndesiredAllocationsTracker { |
82 | 85 | private final FrequencyCappedAction undesiredAllocationDurationLogInterval; |
83 | 86 | private volatile TimeValue undesiredAllocationDurationLoggingThreshold; |
84 | 87 | private volatile int maxUndesiredAllocationsToTrack; |
| 88 | + private boolean missingAllocationAssertionsEnabled = true; |
85 | 89 |
|
86 | 90 | UndesiredAllocationsTracker(ClusterSettings clusterSettings, TimeProvider timeProvider) { |
87 | 91 | this.timeProvider = timeProvider; |
@@ -161,6 +165,14 @@ public void maybeLogUndesiredShardsWarning( |
161 | 165 | } |
162 | 166 | } |
163 | 167 |
|
| 168 | + private boolean shardTierMatchesNodeTier(ShardRouting shardRouting, DiscoveryNode discoveryNode) { |
| 169 | + return switch (shardRouting.role()) { |
| 170 | + case INDEX_ONLY -> discoveryNode.getRoles().contains(DiscoveryNodeRole.INDEX_ROLE); |
| 171 | + case SEARCH_ONLY -> discoveryNode.getRoles().contains(DiscoveryNodeRole.SEARCH_ROLE); |
| 172 | + default -> true; |
| 173 | + }; |
| 174 | + } |
| 175 | + |
164 | 176 | private void logDecisionsForUndesiredShardsOverThreshold( |
165 | 177 | RoutingNodes routingNodes, |
166 | 178 | RoutingAllocation routingAllocation, |
@@ -199,10 +211,21 @@ private void logUndesiredShardDetails( |
199 | 211 | allocation.setDebugMode(RoutingAllocation.DebugMode.EXCLUDE_YES_DECISIONS); |
200 | 212 | try { |
201 | 213 | final var assignment = desiredBalance.getAssignment(shardRouting.shardId()); |
202 | | - logger.warn("Shard {} has been in an undesired allocation for {}", shardRouting.shardId(), undesiredDuration); |
203 | | - for (final var nodeId : assignment.nodeIds()) { |
204 | | - final var decision = allocation.deciders().canAllocate(shardRouting, routingNodes.node(nodeId), allocation); |
205 | | - logger.warn("Shard {} allocation decision for node [{}]: {}", shardRouting.shardId(), nodeId, decision); |
| 214 | + if (assignment != null) { |
| 215 | + logger.warn("Shard {} has been in an undesired allocation for {}", shardRouting.shardId(), undesiredDuration); |
| 216 | + for (final var nodeId : assignment.nodeIds()) { |
| 217 | + if (allocation.nodes().nodeExists(nodeId)) { |
| 218 | + if (shardTierMatchesNodeTier(shardRouting, allocation.nodes().get(nodeId))) { |
| 219 | + final var decision = allocation.deciders().canAllocate(shardRouting, routingNodes.node(nodeId), allocation); |
| 220 | + logger.warn("Shard {} allocation decision for node [{}]: {}", shardRouting.shardId(), nodeId, decision); |
| 221 | + } |
| 222 | + } else { |
| 223 | + logger.warn("Shard {} desired node [{}] has left the cluster", shardRouting.shardId(), nodeId); |
| 224 | + } |
| 225 | + } |
| 226 | + } else { |
| 227 | + assert missingAllocationAssertionsEnabled == false |
| 228 | + : "Shard " + shardRouting + " was missing an assignment, this shouldn't be possible. " + desiredBalance; |
206 | 229 | } |
207 | 230 | } finally { |
208 | 231 | allocation.setDebugMode(originalDebugMode); |
@@ -239,4 +262,10 @@ Map<String, UndesiredAllocation> getUndesiredAllocations() { |
239 | 262 | * @param undesiredSince The timestamp when the shard was first observed in an undesired allocation |
240 | 263 | */ |
241 | 264 | record UndesiredAllocation(ShardId shardId, long undesiredSince) {} |
| 265 | + |
| 266 | + // Exposed for testing |
| 267 | + public Releasable disableMissingAllocationAssertions() { |
| 268 | + missingAllocationAssertionsEnabled = false; |
| 269 | + return () -> missingAllocationAssertionsEnabled = true; |
| 270 | + } |
242 | 271 | } |
0 commit comments