Skip to content

Commit bed02e2

Browse files
Change FailedToCommitClusterStateException to NotMasterException (#135008)
Modifies the `Batch.onRejection` method to accept a `NotMasterException` rather than a `FailedToCommitClusterStateException`. The `NotMasterException` is thrown because the master node has closed. However, at this point we haven't even tried to commit the cluster state so we know for a fact that it's failed, so a `FailedToCommitClusterStateException`, which implies ambiguity in whether the cluster state update was committed or not, is wrong here.
1 parent 0592415 commit bed02e2

File tree

3 files changed

+14
-11
lines changed

3 files changed

+14
-11
lines changed

server/src/main/java/org/elasticsearch/cluster/NotMasterException.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ public NotMasterException(StreamInput in) throws IOException {
3434
super(in);
3535
}
3636

37+
public NotMasterException(String msg, Throwable cause, Object... args) {
38+
super(msg, cause, args);
39+
}
40+
3741
@Override
3842
public Throwable fillInStackTrace() {
3943
return this;

server/src/main/java/org/elasticsearch/cluster/service/MasterService.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ public String toString() {
12831283
if (lifecycle.started()) {
12841284
nextBatch.run(batchCompletionListener);
12851285
} else {
1286-
nextBatch.onRejection(new FailedToCommitClusterStateException("node closed", getRejectionException()));
1286+
nextBatch.onRejection(new NotMasterException("node closed", getRejectionException()));
12871287
batchCompletionListener.onResponse(null);
12881288
}
12891289
});
@@ -1309,7 +1309,7 @@ private void onCompletion() {
13091309
@Override
13101310
public void onRejection(Exception e) {
13111311
assert e instanceof EsRejectedExecutionException esre && esre.isExecutorShutdown() : e;
1312-
drainQueueOnRejection(new FailedToCommitClusterStateException("node closed", e));
1312+
drainQueueOnRejection(new NotMasterException("node closed", e));
13131313
}
13141314

13151315
@Override
@@ -1336,7 +1336,7 @@ private Batch takeNextBatch() {
13361336
private void forkQueueProcessor() {
13371337
// single-threaded: started when totalQueueSize transitions from 0 to 1 and keeps calling itself until the queue is drained.
13381338
if (lifecycle.started() == false) {
1339-
drainQueueOnRejection(new FailedToCommitClusterStateException("node closed", getRejectionException()));
1339+
drainQueueOnRejection(new NotMasterException("node closed", getRejectionException()));
13401340
return;
13411341
}
13421342

@@ -1353,7 +1353,7 @@ private EsRejectedExecutionException getRejectionException() {
13531353
return new EsRejectedExecutionException("master service is in state [" + lifecycleState() + "]", true);
13541354
}
13551355

1356-
private void drainQueueOnRejection(FailedToCommitClusterStateException e) {
1356+
private void drainQueueOnRejection(NotMasterException e) {
13571357
assert totalQueueSize.get() > 0;
13581358
do {
13591359
assert currentlyExecutingBatch == null;
@@ -1407,12 +1407,11 @@ private interface Batch {
14071407
/**
14081408
* Called when the batch is rejected due to the master service shutting down.
14091409
*
1410-
* @param e is a {@link FailedToCommitClusterStateException} to cause things like {@link TransportMasterNodeAction} to retry after
1410+
* @param e is a {@link NotMasterException} to cause things like {@link TransportMasterNodeAction} to retry after
14111411
* submitting a task to a master which shut down. {@code e.getCause()} is the rejection exception, which should be a
14121412
* {@link EsRejectedExecutionException} with {@link EsRejectedExecutionException#isExecutorShutdown()} true.
14131413
*/
1414-
// Should really be a NodeClosedException instead, but this exception type doesn't trigger retries today.
1415-
void onRejection(FailedToCommitClusterStateException e);
1414+
void onRejection(NotMasterException e);
14161415

14171416
/**
14181417
* @return number of tasks in this batch if the batch is pending, or {@code 0} if the batch is not pending.
@@ -1634,7 +1633,7 @@ T acquireForExecution() {
16341633
return task;
16351634
}
16361635

1637-
void onRejection(FailedToCommitClusterStateException e) {
1636+
void onRejection(NotMasterException e) {
16381637
final var task = acquireForExecution();
16391638
if (task != null) {
16401639
try (var ignored = storedContextSupplier.get()) {
@@ -1654,7 +1653,7 @@ boolean isPending() {
16541653

16551654
private class Processor implements Batch {
16561655
@Override
1657-
public void onRejection(FailedToCommitClusterStateException e) {
1656+
public void onRejection(NotMasterException e) {
16581657
final var items = queueSize.getAndSet(0);
16591658
for (int i = 0; i < items; i++) {
16601659
final var entry = queue.poll();

server/src/test/java/org/elasticsearch/cluster/service/MasterServiceTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,7 +2182,7 @@ class TestTask implements ClusterStateTaskListener {
21822182
@Override
21832183
public void onFailure(Exception e) {
21842184
assertEquals(expectedHeader, threadPool.getThreadContext().getHeader(testHeader));
2185-
if ((e instanceof FailedToCommitClusterStateException
2185+
if ((e instanceof NotMasterException
21862186
&& e.getCause() instanceof EsRejectedExecutionException esre
21872187
&& esre.isExecutorShutdown()) == false) {
21882188
throw new AssertionError("unexpected exception", e);
@@ -2361,7 +2361,7 @@ class TestTask implements ClusterStateTaskListener {
23612361
@Override
23622362
public void onFailure(Exception e) {
23632363
assertEquals(expectedHeader, threadPool.getThreadContext().getHeader(testHeader));
2364-
if ((e instanceof FailedToCommitClusterStateException
2364+
if ((e instanceof NotMasterException
23652365
&& e.getCause() instanceof EsRejectedExecutionException esre
23662366
&& esre.isExecutorShutdown()) == false) {
23672367
throw new AssertionError("unexpected exception", e);

0 commit comments

Comments
 (0)