Skip to content

Commit e559f3c

Browse files
committed
CCR should retry on CircuitBreakingException (#62013)
CCR shard follow task can hit CircuitBreakingException on the leader cluster (read changes requests) or the follower cluster (bulk requests). CCR should retry on CircuitBreakingException as it's a transient error.
1 parent d15dad7 commit e559f3c

File tree

2 files changed

+9
-2
lines changed

2 files changed

+9
-2
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTask.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.action.UnavailableShardsException;
1919
import org.elasticsearch.cluster.block.ClusterBlockException;
2020
import org.elasticsearch.common.Randomness;
21+
import org.elasticsearch.common.breaker.CircuitBreakingException;
2122
import org.elasticsearch.common.collect.Tuple;
2223
import org.elasticsearch.common.transport.NetworkExceptionHelper;
2324
import org.elasticsearch.common.unit.TimeValue;
@@ -524,7 +525,8 @@ static boolean shouldRetry(final Exception e) {
524525
actual instanceof NodeClosedException ||
525526
actual instanceof NoSuchRemoteClusterException ||
526527
(actual.getMessage() != null && actual.getMessage().contains("TransportService is closed")) ||
527-
actual instanceof EsRejectedExecutionException;
528+
actual instanceof EsRejectedExecutionException ||
529+
actual instanceof CircuitBreakingException;
528530
}
529531

530532
// These methods are protected for testing purposes:

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTaskRandomTests.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
import org.elasticsearch.action.UnavailableShardsException;
99
import org.elasticsearch.common.UUIDs;
10+
import org.elasticsearch.common.breaker.CircuitBreakingException;
1011
import org.elasticsearch.common.unit.ByteSizeUnit;
1112
import org.elasticsearch.common.unit.ByteSizeValue;
1213
import org.elasticsearch.common.unit.TimeValue;
14+
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
1315
import org.elasticsearch.index.seqno.LocalCheckpointTracker;
1416
import org.elasticsearch.index.shard.ShardId;
1517
import org.elasticsearch.index.translog.Translog;
@@ -285,7 +287,10 @@ private static TestRun createTestRun(long startSeqNo, long startMappingVersion,
285287
List<TestResponse> item = new ArrayList<>();
286288
// Sometimes add a random retryable error
287289
if (sometimes()) {
288-
Exception error = new UnavailableShardsException(new ShardId("test", "test", 0), "");
290+
Exception error = randomFrom(
291+
new UnavailableShardsException(new ShardId("test", "test", 0), ""),
292+
new CircuitBreakingException("test", randomInt(), randomInt()),
293+
new EsRejectedExecutionException("test"));
289294
item.add(new TestResponse(error, mappingVersion, settingsVersion, null));
290295
}
291296
// Sometimes add an empty shard changes response to also simulate a leader shard lagging behind

0 commit comments

Comments
 (0)