|
10 | 10 | import org.apache.logging.log4j.Logger;
|
11 | 11 | import org.apache.logging.log4j.message.ParameterizedMessage;
|
12 | 12 | import org.apache.lucene.store.AlreadyClosedException;
|
13 |
| -import org.elasticsearch.Assertions; |
14 | 13 | import org.elasticsearch.ElasticsearchException;
|
15 | 14 | import org.elasticsearch.ExceptionsHelper;
|
16 | 15 | import org.elasticsearch.action.ActionListener;
|
|
23 | 22 | import org.elasticsearch.cluster.routing.ShardRouting;
|
24 | 23 | import org.elasticsearch.common.breaker.CircuitBreakingException;
|
25 | 24 | import org.elasticsearch.common.io.stream.StreamInput;
|
| 25 | +import org.elasticsearch.common.util.concurrent.AbstractRunnable; |
26 | 26 | import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
|
27 | 27 | import org.elasticsearch.core.Nullable;
|
28 | 28 | import org.elasticsearch.core.TimeValue;
|
@@ -317,26 +317,55 @@ private void updateCheckPoints(ShardRouting shard, LongSupplier localCheckpointS
|
317 | 317 | private void onNoLongerPrimary(Exception failure) {
|
318 | 318 | final Throwable cause = ExceptionsHelper.unwrapCause(failure);
|
319 | 319 | final boolean nodeIsClosing = cause instanceof NodeClosedException;
|
320 |
| - final String message; |
321 | 320 | if (nodeIsClosing) {
|
322 |
| - message = String.format( |
323 |
| - Locale.ROOT, |
324 |
| - "node with primary [%s] is shutting down while failing replica shard", |
325 |
| - primary.routingEntry() |
326 |
| - ); |
327 | 321 | // We prefer not to fail the primary to avoid unnecessary warning log
|
328 | 322 | // when the node with the primary shard is gracefully shutting down.
|
| 323 | + finishAsFailed( |
| 324 | + new RetryOnPrimaryException( |
| 325 | + primary.routingEntry().shardId(), |
| 326 | + String.format( |
| 327 | + Locale.ROOT, |
| 328 | + "node with primary [%s] is shutting down while failing replica shard", |
| 329 | + primary.routingEntry() |
| 330 | + ), |
| 331 | + failure |
| 332 | + ) |
| 333 | + ); |
329 | 334 | } else {
|
330 |
| - if (Assertions.ENABLED) { |
331 |
| - if (failure instanceof ShardStateAction.NoLongerPrimaryShardException == false) { |
332 |
| - throw new AssertionError("unexpected failure", failure); |
| 335 | + assert failure instanceof ShardStateAction.NoLongerPrimaryShardException : failure; |
| 336 | + threadPool.executor(ThreadPool.Names.WRITE).execute(new AbstractRunnable() { |
| 337 | + @Override |
| 338 | + protected void doRun() { |
| 339 | + // we are no longer the primary, fail ourselves and start over |
| 340 | + final var message = String.format( |
| 341 | + Locale.ROOT, |
| 342 | + "primary shard [%s] was demoted while failing replica shard", |
| 343 | + primary.routingEntry() |
| 344 | + ); |
| 345 | + primary.failShard(message, failure); |
| 346 | + finishAsFailed(new RetryOnPrimaryException(primary.routingEntry().shardId(), message, failure)); |
333 | 347 | }
|
334 |
| - } |
335 |
| - // we are no longer the primary, fail ourselves and start over |
336 |
| - message = String.format(Locale.ROOT, "primary shard [%s] was demoted while failing replica shard", primary.routingEntry()); |
337 |
| - primary.failShard(message, failure); |
| 348 | + |
| 349 | + @Override |
| 350 | + public boolean isForceExecution() { |
| 351 | + return true; |
| 352 | + } |
| 353 | + |
| 354 | + @Override |
| 355 | + public void onFailure(Exception e) { |
| 356 | + e.addSuppressed(failure); |
| 357 | + assert false : e; |
| 358 | + logger.error(new ParameterizedMessage("unexpected failure while failing primary [{}]", primary.routingEntry()), e); |
| 359 | + finishAsFailed( |
| 360 | + new RetryOnPrimaryException( |
| 361 | + primary.routingEntry().shardId(), |
| 362 | + String.format(Locale.ROOT, "unexpected failure while failing primary [%s]", primary.routingEntry()), |
| 363 | + e |
| 364 | + ) |
| 365 | + ); |
| 366 | + } |
| 367 | + }); |
338 | 368 | }
|
339 |
| - finishAsFailed(new RetryOnPrimaryException(primary.routingEntry().shardId(), message, failure)); |
340 | 369 | }
|
341 | 370 |
|
342 | 371 | /**
|
|
0 commit comments