Skip to content

Commit 19b7475

Browse files
committed
Make resilient to metadata changes
1 parent ef8f45b commit 19b7475

File tree

2 files changed

+102
-45
lines changed

2 files changed

+102
-45
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/UndesiredAllocationsTracker.java

Lines changed: 57 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99

1010
package org.elasticsearch.cluster.routing.allocation.allocator;
1111

12-
import com.carrotsearch.hppc.ObjectLongHashMap;
13-
import com.carrotsearch.hppc.ObjectLongMap;
14-
1512
import org.elasticsearch.cluster.routing.RoutingNodes;
1613
import org.elasticsearch.cluster.routing.ShardRouting;
1714
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
@@ -20,12 +17,13 @@
2017
import org.elasticsearch.common.settings.Setting;
2118
import org.elasticsearch.common.time.TimeProvider;
2219
import org.elasticsearch.core.TimeValue;
23-
import org.elasticsearch.core.Tuple;
20+
import org.elasticsearch.index.shard.ShardId;
2421
import org.elasticsearch.logging.LogManager;
2522
import org.elasticsearch.logging.Logger;
2623

24+
import java.util.HashMap;
25+
import java.util.Map;
2726
import java.util.stream.Collectors;
28-
import java.util.stream.StreamSupport;
2927

3028
/**
3129
* Keeps track of a limited number of shards that are currently in undesired allocations. If the
@@ -70,7 +68,7 @@ public class UndesiredAllocationsTracker {
7068
);
7169

7270
private final TimeProvider timeProvider;
73-
private final ObjectLongHashMap<ShardRouting> undesiredAllocations = new ObjectLongHashMap<>();
71+
private final Map<String, UndesiredAllocation> undesiredAllocations = new HashMap<>();
7472
private final FrequencyCappedAction undesiredAllocationDurationLogInterval;
7573
private volatile TimeValue undesiredAllocationDurationLoggingThreshold;
7674
private volatile int maxUndesiredAllocationsToTrack;
@@ -94,30 +92,36 @@ public class UndesiredAllocationsTracker {
9492
*/
9593
public void trackUndesiredAllocation(ShardRouting shardRouting) {
9694
assert shardRouting.unassigned() == false : "Shouldn't record unassigned shards as undesired allocations";
97-
if (undesiredAllocations.size() < maxUndesiredAllocationsToTrack && undesiredAllocations.containsKey(shardRouting) == false) {
98-
undesiredAllocations.put(shardRouting, timeProvider.relativeTimeInMillis());
95+
if (undesiredAllocations.size() < maxUndesiredAllocationsToTrack) {
96+
final var allocationId = shardRouting.allocationId().getId();
97+
if (undesiredAllocations.containsKey(allocationId) == false) {
98+
undesiredAllocations.put(
99+
allocationId,
100+
new UndesiredAllocation(shardRouting.shardId(), timeProvider.relativeTimeInMillis())
101+
);
102+
}
99103
}
100104
}
101105

102106
/**
103107
* Remove any tracking of the specified allocation (a no-op if the allocation isn't being tracked)
104108
*/
105109
public void removeTracking(ShardRouting shardRouting) {
106-
undesiredAllocations.remove(shardRouting);
110+
if (shardRouting.unassigned() == false) {
111+
undesiredAllocations.remove(shardRouting.allocationId().getId());
112+
} else {
113+
assert false : "Shouldn't remove tracking of unassigned shards";
114+
}
107115
}
108116

109117
/**
110118
* Clear any {@link ShardRouting} that are no longer present in the routing nodes
111119
*/
112120
public void cleanup(RoutingNodes routingNodes) {
113-
undesiredAllocations.removeAll(shardRouting -> {
114-
final var allocationId = shardRouting.allocationId();
115-
if (allocationId != null) {
116-
return routingNodes.getByAllocationId(shardRouting.shardId(), allocationId.getId()) == null;
117-
} else {
118-
assert false : "Unassigned shards shouldn't be marked as undesired";
119-
return true;
120-
}
121+
undesiredAllocations.entrySet().removeIf(e -> {
122+
final var undesiredAllocation = e.getValue();
123+
final var allocationId = e.getKey();
124+
return routingNodes.getByAllocationId(undesiredAllocation.shardId(), allocationId) == null;
121125
});
122126
shrinkIfOversized();
123127
}
@@ -140,9 +144,9 @@ public void maybeLogUndesiredShardsWarning(
140144
) {
141145
final long currentTimeMillis = timeProvider.relativeTimeInMillis();
142146
long earliestUndesiredTimestamp = Long.MAX_VALUE;
143-
for (var allocation : undesiredAllocations) {
144-
if (allocation.value < earliestUndesiredTimestamp) {
145-
earliestUndesiredTimestamp = allocation.value;
147+
for (var undesiredAllocation : undesiredAllocations.values()) {
148+
if (undesiredAllocation.undesiredSince() < earliestUndesiredTimestamp) {
149+
earliestUndesiredTimestamp = undesiredAllocation.undesiredSince();
146150
}
147151
}
148152
if (earliestUndesiredTimestamp < currentTimeMillis
@@ -160,15 +164,22 @@ private void logDecisionsForUndesiredShardsOverThreshold(
160164
) {
161165
final long currentTimeMillis = timeProvider.relativeTimeInMillis();
162166
final long loggingThresholdTimestamp = currentTimeMillis - undesiredAllocationDurationLoggingThreshold.millis();
163-
for (var allocation : undesiredAllocations) {
164-
if (allocation.value < loggingThresholdTimestamp) {
165-
logUndesiredShardDetails(
166-
allocation.key,
167-
TimeValue.timeValueMillis(currentTimeMillis - allocation.value),
168-
routingNodes,
169-
routingAllocation,
170-
desiredBalance
171-
);
167+
for (var allocation : undesiredAllocations.entrySet()) {
168+
final var undesiredAllocation = allocation.getValue();
169+
final var allocationId = allocation.getKey();
170+
if (undesiredAllocation.undesiredSince() < loggingThresholdTimestamp) {
171+
final var shardRouting = routingNodes.getByAllocationId(undesiredAllocation.shardId(), allocationId);
172+
if (shardRouting != null) {
173+
logUndesiredShardDetails(
174+
shardRouting,
175+
TimeValue.timeValueMillis(currentTimeMillis - undesiredAllocation.undesiredSince()),
176+
routingNodes,
177+
routingAllocation,
178+
desiredBalance
179+
);
180+
} else {
181+
assert false : undesiredAllocation + " for allocationID " + allocationId + " was not cleaned up";
182+
}
172183
}
173184
}
174185
}
@@ -200,19 +211,28 @@ private void logUndesiredShardDetails(
200211
*/
201212
private void shrinkIfOversized() {
202213
if (undesiredAllocations.size() > maxUndesiredAllocationsToTrack) {
203-
final var newestExcessValues = StreamSupport.stream(undesiredAllocations.spliterator(), false)
204-
// we need to take a copy from the cursor because the cursors are re-used, so don't work with #sorted
205-
.map(cursor -> new Tuple<>(cursor.key, cursor.value))
206-
.sorted((a, b) -> Long.compare(b.v2(), a.v2()))
214+
final var newestExcessAllocationIds = undesiredAllocations.entrySet()
215+
.stream()
216+
.sorted((a, b) -> Long.compare(b.getValue().undesiredSince(), a.getValue().undesiredSince()))
207217
.limit(undesiredAllocations.size() - maxUndesiredAllocationsToTrack)
208-
.map(Tuple::v1)
218+
.map(Map.Entry::getKey)
209219
.collect(Collectors.toSet());
210-
undesiredAllocations.removeAll(newestExcessValues::contains);
220+
undesiredAllocations.keySet().removeAll(newestExcessAllocationIds);
211221
}
212222
}
213223

214224
// visible for testing
215-
ObjectLongMap<ShardRouting> getUndesiredAllocations() {
216-
return undesiredAllocations.clone();
225+
Map<String, UndesiredAllocation> getUndesiredAllocations() {
226+
return Map.copyOf(undesiredAllocations);
217227
}
228+
229+
/**
230+
* Rather than storing the {@link ShardRouting}, we store a map of allocationId -> {@link UndesiredAllocation}
231+
* this is because the allocation ID will persist as long as a shard stays on the same node, but the
232+
* {@link ShardRouting} changes for a variety of reasons even when the shard doesn't move.
233+
*
234+
* @param shardId The shard ID
235+
* @param undesiredSince The timestamp when the shard was first observed in an undesired allocation
236+
*/
237+
record UndesiredAllocation(ShardId shardId, long undesiredSince) {}
218238
}

server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/UndesiredAllocationsTrackerTests.java

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131

3232
import java.util.stream.Collectors;
3333
import java.util.stream.IntStream;
34-
import java.util.stream.StreamSupport;
3534

3635
public class UndesiredAllocationsTrackerTests extends ESTestCase {
3736

@@ -70,9 +69,14 @@ public void testShardsArePrunedWhenRemovedFromRoutingTable() {
7069
RoutingNodes.immutable(stateWithIndexRemoved.globalRoutingTable(), stateWithIndexRemoved.nodes())
7170
);
7271
assertEquals((numberOfIndices - 1) * primaryShards, undesiredAllocationsTracker.getUndesiredAllocations().size());
73-
undesiredAllocationsTracker.getUndesiredAllocations()
74-
.iterator()
75-
.forEachRemaining(olc -> assertNotNull(stateWithIndexRemoved.routingTable(ProjectId.DEFAULT).index(olc.key.index())));
72+
assertTrue(
73+
undesiredAllocationsTracker.getUndesiredAllocations()
74+
.values()
75+
.stream()
76+
.allMatch(
77+
allocation -> stateWithIndexRemoved.routingTable(ProjectId.DEFAULT).index(allocation.shardId().getIndex()) != null
78+
)
79+
);
7680
}
7781

7882
public void testNewestRecordsArePurgedWhenLimitIsDecreased() {
@@ -110,8 +114,10 @@ public void testNewestRecordsArePurgedWhenLimitIsDecreased() {
110114
// We should purge the most recent entries in #cleanup
111115
undesiredAllocationsTracker.cleanup(routingNodes);
112116
assertEquals(reducedMaximum, undesiredAllocationsTracker.getUndesiredAllocations().size());
113-
final var remainingShardIds = StreamSupport.stream(undesiredAllocationsTracker.getUndesiredAllocations().spliterator(), false)
114-
.map(olc -> olc.key.shardId().id())
117+
final var remainingShardIds = undesiredAllocationsTracker.getUndesiredAllocations()
118+
.values()
119+
.stream()
120+
.map(allocation -> allocation.shardId().id())
115121
.collect(Collectors.toSet());
116122
assertEquals(IntStream.range(0, reducedMaximum).boxed().collect(Collectors.toSet()), remainingShardIds);
117123
}
@@ -133,12 +139,43 @@ public void testCannotTrackMoreShardsThanTheLimit() {
133139

134140
// Only the first {maxToTrack} shards should be tracked
135141
assertEquals(maxToTrack, undesiredAllocationsTracker.getUndesiredAllocations().size());
136-
final var trackedShardIds = StreamSupport.stream(undesiredAllocationsTracker.getUndesiredAllocations().spliterator(), false)
137-
.map(olc -> olc.key.shardId().id())
142+
final var trackedShardIds = undesiredAllocationsTracker.getUndesiredAllocations()
143+
.values()
144+
.stream()
145+
.map(allocation -> allocation.shardId().id())
138146
.collect(Collectors.toSet());
139147
assertEquals(IntStream.range(0, maxToTrack).boxed().collect(Collectors.toSet()), trackedShardIds);
140148
}
141149

150+
public void testUndesiredAllocationsAreIdentifiableDespiteMetadataChanges() {
151+
final var clusterSettings = ClusterSettings.createBuiltInClusterSettings(
152+
Settings.builder().put(UndesiredAllocationsTracker.MAX_UNDESIRED_ALLOCATIONS_TO_TRACK.getKey(), randomIntBetween(1, 10)).build()
153+
);
154+
final var advancingTimeProvider = new AdvancingTimeProvider();
155+
final var undesiredAllocationsTracker = new UndesiredAllocationsTracker(clusterSettings, advancingTimeProvider);
156+
final var index = new Index(randomIdentifier(), randomIdentifier());
157+
158+
ShardRouting shardRouting = createAssignedRouting(index, 0);
159+
160+
undesiredAllocationsTracker.trackUndesiredAllocation(shardRouting);
161+
assertEquals(1, undesiredAllocationsTracker.getUndesiredAllocations().size());
162+
163+
// move to started
164+
shardRouting = shardRouting.moveToStarted(randomNonNegativeLong());
165+
undesiredAllocationsTracker.trackUndesiredAllocation(shardRouting);
166+
assertEquals(1, undesiredAllocationsTracker.getUndesiredAllocations().size());
167+
168+
// start a relocation
169+
shardRouting = shardRouting.relocate(randomIdentifier(), randomNonNegativeLong());
170+
undesiredAllocationsTracker.trackUndesiredAllocation(shardRouting);
171+
assertEquals(1, undesiredAllocationsTracker.getUndesiredAllocations().size());
172+
173+
// cancel that relocation
174+
shardRouting = shardRouting.cancelRelocation();
175+
undesiredAllocationsTracker.removeTracking(shardRouting);
176+
assertEquals(0, undesiredAllocationsTracker.getUndesiredAllocations().size());
177+
}
178+
142179
private ClusterState removeRandomIndex(ClusterState state) {
143180
RoutingTable originalRoutingTable = state.routingTable(ProjectId.DEFAULT);
144181
RoutingTable updatedRoutingTable = RoutingTable.builder(originalRoutingTable)

0 commit comments

Comments
 (0)