Skip to content

Commit bd0b090

Browse files
authored
Add more invariant checks to ISRT (#92705)
Today there are two invariants of all realistic `IndexShardRoutingTable` instances which are not enforced, nor are they satisfied by all instances created by tests: - every `IndexShardRoutingTable` must have exactly one primary shard - if the primary shard is not active (`STARTED` or `RELOCATING`) then the replicas must all be `UNASSIGNED` This commit adds assertions to enforce these invariants, and fixes up the tests which create instances that do not satisfy them.
1 parent 6a04403 commit bd0b090

File tree

19 files changed

+254
-306
lines changed

19 files changed

+254
-306
lines changed

server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterStateDiffIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ private IndexRoutingTable randomIndexRoutingTable(String index, String[] nodeIds
291291
nodeId,
292292
null,
293293
j == 0,
294-
ShardRoutingState.fromValue((byte) randomIntBetween(2, 3)),
294+
j == 0 ? ShardRoutingState.STARTED : randomFrom(ShardRoutingState.INITIALIZING, ShardRoutingState.STARTED),
295295
unassignedInfo
296296
)
297297
);

server/src/main/java/org/elasticsearch/cluster/routing/IndexShardRoutingTable.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ public IndexShardRoutingTable build() {
559559
// don't allow more than one shard copy with same id to be allocated to same node
560560
assert distinctNodes(shards) : "more than one shard with same id assigned to same node (shards: " + shards + ")";
561561
assert noDuplicatePrimary(shards) : "expected but did not find unique primary in shard routing table: " + shards;
562+
assert noAssignedReplicaWithoutActivePrimary(shards) : "unexpected assigned replica with no active primary: " + shards;
562563
return new IndexShardRoutingTable(shardId, shards);
563564
}
564565

@@ -589,9 +590,24 @@ static boolean noDuplicatePrimary(List<ShardRouting> shards) {
589590
seenPrimary = true;
590591
}
591592
}
592-
// We should be able to return seenPrimary here, but in tests there are many routing tables with no primary (e.g. empty) so for
593-
// now we leniently allow there to be no primary as well. TODO fix those tests and stop being lenient here.
594-
return true;
593+
return seenPrimary;
594+
}
595+
596+
static boolean noAssignedReplicaWithoutActivePrimary(List<ShardRouting> shards) {
597+
boolean seenAssignedReplica = false;
598+
for (final var shard : shards) {
599+
if (shard.currentNodeId() != null) {
600+
if (shard.primary()) {
601+
if (shard.active()) {
602+
return true;
603+
}
604+
} else {
605+
seenAssignedReplica = true;
606+
}
607+
}
608+
}
609+
610+
return seenAssignedReplica == false;
595611
}
596612

597613
public static IndexShardRoutingTable.Builder readFrom(StreamInput in) throws IOException {

server/src/main/java/org/elasticsearch/indices/store/IndicesStore.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ public void clusterChanged(ClusterChangedEvent event) {
184184
}
185185

186186
static boolean shardCanBeDeleted(String localNodeId, IndexShardRoutingTable indexShardRoutingTable) {
187+
assert indexShardRoutingTable.size() > 0;
188+
187189
// a shard can be deleted if all its copies are active, and its not allocated on this node
188190
if (indexShardRoutingTable.size() == 0) {
189191
// should not really happen, there should always be at least 1 (primary) shard in a

server/src/test/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthActionTests.java

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,21 @@ ClusterState randomClusterStateWithInitializingShards(String index, final int in
7272
.build();
7373

7474
final List<ShardRoutingState> shardRoutingStates = new ArrayList<>();
75-
IntStream.range(0, between(1, 30))
76-
.forEach(
77-
i -> shardRoutingStates.add(
78-
randomFrom(ShardRoutingState.STARTED, ShardRoutingState.UNASSIGNED, ShardRoutingState.RELOCATING)
79-
)
80-
);
81-
IntStream.range(0, initializingShards).forEach(i -> shardRoutingStates.add(ShardRoutingState.INITIALIZING));
82-
Randomness.shuffle(shardRoutingStates);
83-
84-
// primary can not be unassigned, otherwise replicas can't in initializing or relocating state.
85-
// (assertion in RoutingNodes disallows this)
86-
if (shardRoutingStates.get(0) == ShardRoutingState.UNASSIGNED) {
87-
// Don't randomly pick ShardRoutingState.UNASSIGNED, since that already has randomly been inserted based on initializingShards
88-
shardRoutingStates.set(0, randomFrom(ShardRoutingState.STARTED, ShardRoutingState.RELOCATING));
75+
if (initializingShards == 1 && randomBoolean()) {
76+
shardRoutingStates.add(ShardRoutingState.INITIALIZING);
77+
IntStream.range(0, between(0, 30)).forEach(i -> shardRoutingStates.add(ShardRoutingState.UNASSIGNED));
78+
} else {
79+
IntStream.range(0, between(0, 30))
80+
.forEach(
81+
i -> shardRoutingStates.add(
82+
randomFrom(ShardRoutingState.STARTED, ShardRoutingState.UNASSIGNED, ShardRoutingState.RELOCATING)
83+
)
84+
);
85+
IntStream.range(0, initializingShards).forEach(i -> shardRoutingStates.add(ShardRoutingState.INITIALIZING));
86+
Randomness.shuffle(shardRoutingStates);
87+
88+
// primary must be active, otherwise replicas can't in initializing or relocating state.
89+
shardRoutingStates.add(0, randomFrom(ShardRoutingState.STARTED, ShardRoutingState.RELOCATING));
8990
}
9091

9192
final ShardId shardId = new ShardId(indexMetadata.getIndex(), 0);

server/src/test/java/org/elasticsearch/cluster/action/shard/ShardStartedClusterStateTaskExecutorTests.java

Lines changed: 114 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.elasticsearch.index.shard.ShardId;
2727
import org.elasticsearch.index.shard.ShardLongFieldRange;
2828

29-
import java.util.ArrayList;
3029
import java.util.List;
3130
import java.util.stream.IntStream;
3231
import java.util.stream.Stream;
@@ -142,45 +141,57 @@ public void testNonInitializingShardAreMarkedAsSuccessful() throws Exception {
142141
assertSame(clusterState, executeTasks(clusterState, tasks));
143142
}
144143

145-
public void testStartedShards() throws Exception {
144+
public void testStartPrimary() throws Exception {
146145
final String indexName = "test";
147-
final ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.INITIALIZING, ShardRoutingState.INITIALIZING);
146+
final ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.INITIALIZING);
148147

149148
final IndexMetadata indexMetadata = clusterState.metadata().index(indexName);
150149
final ShardId shardId = new ShardId(indexMetadata.getIndex(), 0);
151150
final long primaryTerm = indexMetadata.primaryTerm(shardId.id());
152151
final ShardRouting primaryShard = clusterState.routingTable().shardRoutingTable(shardId).primaryShard();
153152
final String primaryAllocationId = primaryShard.allocationId().getId();
154153

155-
final List<StartedShardUpdateTask> tasks = new ArrayList<>();
156-
tasks.add(
157-
new StartedShardUpdateTask(
158-
new StartedShardEntry(shardId, primaryAllocationId, primaryTerm, "test", ShardLongFieldRange.UNKNOWN),
159-
createTestListener()
160-
)
154+
final var task = new StartedShardUpdateTask(
155+
new StartedShardEntry(shardId, primaryAllocationId, primaryTerm, "test", ShardLongFieldRange.UNKNOWN),
156+
createTestListener()
161157
);
162-
if (randomBoolean()) {
163-
final ShardRouting replicaShard = clusterState.routingTable().shardRoutingTable(shardId).replicaShards().iterator().next();
164-
final String replicaAllocationId = replicaShard.allocationId().getId();
165-
tasks.add(
166-
new StartedShardUpdateTask(
167-
new StartedShardEntry(shardId, replicaAllocationId, primaryTerm, "test", ShardLongFieldRange.UNKNOWN),
168-
createTestListener()
169-
)
170-
);
171-
}
172158

173-
final var resultingState = executeTasks(clusterState, tasks);
159+
final var resultingState = executeTasks(clusterState, List.of(task));
174160
assertNotSame(clusterState, resultingState);
175-
for (final var task : tasks) {
176-
assertThat(
177-
resultingState.routingTable()
178-
.shardRoutingTable(task.getEntry().shardId)
179-
.getByAllocationId(task.getEntry().allocationId)
180-
.state(),
181-
is(ShardRoutingState.STARTED)
182-
);
183-
}
161+
assertThat(
162+
resultingState.routingTable()
163+
.shardRoutingTable(task.getEntry().shardId)
164+
.getByAllocationId(task.getEntry().allocationId)
165+
.state(),
166+
is(ShardRoutingState.STARTED)
167+
);
168+
}
169+
170+
public void testStartReplica() throws Exception {
171+
final String indexName = "test";
172+
final ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.STARTED, ShardRoutingState.INITIALIZING);
173+
174+
final IndexMetadata indexMetadata = clusterState.metadata().index(indexName);
175+
final ShardId shardId = new ShardId(indexMetadata.getIndex(), 0);
176+
final long primaryTerm = indexMetadata.primaryTerm(shardId.id());
177+
final ShardRouting primaryShard = clusterState.routingTable().shardRoutingTable(shardId).primaryShard();
178+
179+
final ShardRouting replicaShard = clusterState.routingTable().shardRoutingTable(shardId).replicaShards().iterator().next();
180+
final String replicaAllocationId = replicaShard.allocationId().getId();
181+
final var task = new StartedShardUpdateTask(
182+
new StartedShardEntry(shardId, replicaAllocationId, primaryTerm, "test", ShardLongFieldRange.UNKNOWN),
183+
createTestListener()
184+
);
185+
186+
final var resultingState = executeTasks(clusterState, List.of(task));
187+
assertNotSame(clusterState, resultingState);
188+
assertThat(
189+
resultingState.routingTable()
190+
.shardRoutingTable(task.getEntry().shardId)
191+
.getByAllocationId(task.getEntry().allocationId)
192+
.state(),
193+
is(ShardRoutingState.STARTED)
194+
);
184195
}
185196

186197
public void testDuplicateStartsAreOkay() throws Exception {
@@ -215,12 +226,12 @@ public void testDuplicateStartsAreOkay() throws Exception {
215226
}
216227
}
217228

218-
public void testPrimaryTermsMismatch() throws Exception {
229+
public void testPrimaryTermsMismatchOnPrimary() throws Exception {
219230
final String indexName = "test";
220231
final int shard = 0;
221232
final int primaryTerm = 2 + randomInt(200);
222233

223-
ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.INITIALIZING, ShardRoutingState.INITIALIZING);
234+
ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.INITIALIZING);
224235
clusterState = ClusterState.builder(clusterState)
225236
.metadata(
226237
Metadata.builder(clusterState.metadata())
@@ -272,8 +283,23 @@ public void testPrimaryTermsMismatch() throws Exception {
272283
.state(),
273284
is(ShardRoutingState.STARTED)
274285
);
275-
clusterState = resultingState;
276286
}
287+
}
288+
289+
public void testPrimaryTermsMismatchOnReplica() throws Exception {
290+
final String indexName = "test";
291+
final int shard = 0;
292+
final int primaryTerm = 2 + randomInt(200);
293+
294+
ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.STARTED, ShardRoutingState.INITIALIZING);
295+
clusterState = ClusterState.builder(clusterState)
296+
.metadata(
297+
Metadata.builder(clusterState.metadata())
298+
.put(IndexMetadata.builder(clusterState.metadata().index(indexName)).primaryTerm(shard, primaryTerm).build(), true)
299+
.build()
300+
)
301+
.build();
302+
final ShardId shardId = new ShardId(clusterState.metadata().index(indexName).getIndex(), shard);
277303
{
278304
final long replicaPrimaryTerm = randomBoolean() ? primaryTerm : primaryTerm - 1;
279305
final String replicaAllocationId = clusterState.routingTable()
@@ -301,9 +327,9 @@ public void testPrimaryTermsMismatch() throws Exception {
301327
}
302328
}
303329

304-
public void testExpandsTimestampRange() throws Exception {
330+
public void testExpandsTimestampRangeForPrimary() throws Exception {
305331
final String indexName = "test";
306-
final ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.INITIALIZING, ShardRoutingState.INITIALIZING);
332+
final ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.INITIALIZING);
307333

308334
final IndexMetadata indexMetadata = clusterState.metadata().index(indexName);
309335
final ShardId shardId = new ShardId(indexMetadata.getIndex(), 0);
@@ -317,47 +343,66 @@ public void testExpandsTimestampRange() throws Exception {
317343
: randomBoolean() ? ShardLongFieldRange.EMPTY
318344
: ShardLongFieldRange.of(1606407943000L, 1606407944000L);
319345

320-
final List<StartedShardUpdateTask> tasks = new ArrayList<>();
321-
tasks.add(
322-
new StartedShardUpdateTask(
323-
new StartedShardEntry(shardId, primaryAllocationId, primaryTerm, "test", shardTimestampRange),
324-
createTestListener()
325-
)
346+
final var task = new StartedShardUpdateTask(
347+
new StartedShardEntry(shardId, primaryAllocationId, primaryTerm, "test", shardTimestampRange),
348+
createTestListener()
326349
);
327-
if (randomBoolean()) {
328-
final ShardRouting replicaShard = clusterState.routingTable().shardRoutingTable(shardId).replicaShards().iterator().next();
329-
final String replicaAllocationId = replicaShard.allocationId().getId();
330-
tasks.add(
331-
new StartedShardUpdateTask(
332-
new StartedShardEntry(shardId, replicaAllocationId, primaryTerm, "test", shardTimestampRange),
333-
createTestListener()
334-
)
335-
);
336-
}
337-
final var resultingState = executeTasks(clusterState, tasks);
350+
351+
final var resultingState = executeTasks(clusterState, List.of(task));
338352
assertNotSame(clusterState, resultingState);
339-
for (final var task : tasks) {
340-
assertThat(
341-
resultingState.routingTable()
342-
.shardRoutingTable(task.getEntry().shardId)
343-
.getByAllocationId(task.getEntry().allocationId)
344-
.state(),
345-
is(ShardRoutingState.STARTED)
346-
);
353+
assertThat(
354+
resultingState.routingTable()
355+
.shardRoutingTable(task.getEntry().shardId)
356+
.getByAllocationId(task.getEntry().allocationId)
357+
.state(),
358+
is(ShardRoutingState.STARTED)
359+
);
347360

348-
final var timestampRange = resultingState.metadata().index(indexName).getTimestampRange();
349-
if (shardTimestampRange == ShardLongFieldRange.UNKNOWN) {
350-
assertThat(timestampRange, sameInstance(IndexLongFieldRange.UNKNOWN));
351-
} else if (shardTimestampRange == ShardLongFieldRange.EMPTY) {
352-
assertThat(timestampRange, sameInstance(IndexLongFieldRange.EMPTY));
353-
} else {
354-
assertTrue(timestampRange.isComplete());
355-
assertThat(timestampRange.getMin(), equalTo(shardTimestampRange.getMin()));
356-
assertThat(timestampRange.getMax(), equalTo(shardTimestampRange.getMax()));
357-
}
361+
final var timestampRange = resultingState.metadata().index(indexName).getTimestampRange();
362+
if (shardTimestampRange == ShardLongFieldRange.UNKNOWN) {
363+
assertThat(timestampRange, sameInstance(IndexLongFieldRange.UNKNOWN));
364+
} else if (shardTimestampRange == ShardLongFieldRange.EMPTY) {
365+
assertThat(timestampRange, sameInstance(IndexLongFieldRange.EMPTY));
366+
} else {
367+
assertTrue(timestampRange.isComplete());
368+
assertThat(timestampRange.getMin(), equalTo(shardTimestampRange.getMin()));
369+
assertThat(timestampRange.getMax(), equalTo(shardTimestampRange.getMax()));
358370
}
359371
}
360372

373+
public void testExpandsTimestampRangeForReplica() throws Exception {
374+
final String indexName = "test";
375+
final ClusterState clusterState = state(indexName, randomBoolean(), ShardRoutingState.STARTED, ShardRoutingState.INITIALIZING);
376+
377+
final IndexMetadata indexMetadata = clusterState.metadata().index(indexName);
378+
final ShardId shardId = new ShardId(indexMetadata.getIndex(), 0);
379+
final long primaryTerm = indexMetadata.primaryTerm(shardId.id());
380+
381+
assertThat(indexMetadata.getTimestampRange(), sameInstance(IndexLongFieldRange.UNKNOWN));
382+
383+
final ShardLongFieldRange shardTimestampRange = randomBoolean() ? ShardLongFieldRange.UNKNOWN
384+
: randomBoolean() ? ShardLongFieldRange.EMPTY
385+
: ShardLongFieldRange.of(1606407943000L, 1606407944000L);
386+
387+
final ShardRouting replicaShard = clusterState.routingTable().shardRoutingTable(shardId).replicaShards().iterator().next();
388+
final String replicaAllocationId = replicaShard.allocationId().getId();
389+
final var task = new StartedShardUpdateTask(
390+
new StartedShardEntry(shardId, replicaAllocationId, primaryTerm, "test", shardTimestampRange),
391+
createTestListener()
392+
);
393+
final var resultingState = executeTasks(clusterState, List.of(task));
394+
assertNotSame(clusterState, resultingState);
395+
assertThat(
396+
resultingState.routingTable()
397+
.shardRoutingTable(task.getEntry().shardId)
398+
.getByAllocationId(task.getEntry().allocationId)
399+
.state(),
400+
is(ShardRoutingState.STARTED)
401+
);
402+
403+
assertThat(resultingState.metadata().index(indexName).getTimestampRange(), sameInstance(IndexLongFieldRange.UNKNOWN));
404+
}
405+
361406
private ClusterState executeTasks(final ClusterState state, final List<StartedShardUpdateTask> tasks) throws Exception {
362407
return ClusterStateTaskExecutorUtils.executeAndAssertSuccessful(state, executor, tasks);
363408
}

0 commit comments

Comments
 (0)