|
49 | 49 | import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; |
50 | 50 | import static org.hamcrest.Matchers.empty; |
51 | 51 | import static org.hamcrest.Matchers.is; |
| 52 | +import static org.junit.Assert.assertTrue; |
52 | 53 |
|
53 | 54 | public class SnapshotsServiceTests extends ESTestCase { |
54 | 55 |
|
@@ -467,6 +468,164 @@ public void testPauseForNodeRemovalWithQueuedShards() throws Exception { |
467 | 468 | ); |
468 | 469 | } |
469 | 470 |
|
| 471 | + private SnapshotsInProgress.ShardSnapshotStatus successShardSnapshotStatus( |
| 472 | + String nodeId, |
| 473 | + ShardId shardId, |
| 474 | + SnapshotsInProgress.Entry entry |
| 475 | + ) { |
| 476 | + return SnapshotsInProgress.ShardSnapshotStatus.success( |
| 477 | + nodeId, |
| 478 | + new ShardSnapshotResult(entry.shards().get(shardId).generation(), ByteSizeValue.ofBytes(1L), 1) |
| 479 | + ); |
| 480 | + } |
| 481 | + |
| 482 | + private SnapshotsInProgress.ShardSnapshotStatus failedShardSnapshotStatus( |
| 483 | + String nodeId, |
| 484 | + ShardId shardId, |
| 485 | + SnapshotsInProgress.Entry entry |
| 486 | + ) { |
| 487 | + return new SnapshotsInProgress.ShardSnapshotStatus( |
| 488 | + nodeId, |
| 489 | + SnapshotsInProgress.ShardState.FAILED, |
| 490 | + entry.shards().get(shardId).generation(), |
| 491 | + "test injected failure" |
| 492 | + ); |
| 493 | + } |
| 494 | + |
| 495 | + private SnapshotsInProgress.ShardSnapshotStatus pausedShardSnapshotStatus( |
| 496 | + String nodeId, |
| 497 | + ShardId shardId, |
| 498 | + SnapshotsInProgress.Entry entry |
| 499 | + ) { |
| 500 | + return new SnapshotsInProgress.ShardSnapshotStatus( |
| 501 | + nodeId, |
| 502 | + SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL, |
| 503 | + entry.shards().get(shardId).generation() |
| 504 | + ); |
| 505 | + } |
| 506 | + |
| 507 | + /** |
| 508 | + * Tests that, within the same cluster state batched update execution, a shard snapshot status update of PAUSED_FOR_NODE_REMOVAL will be |
| 509 | + * ignored after the same shard snapshot has already been updated to a completed state. On the other hand, a PAUSED_FOR_NODE_REMOVAL |
| 510 | + * update follow by a SUCCESS, or other completed state, update should be applied and result in SUCCESS. |
| 511 | + */ |
| 512 | + public void testBatchedShardSnapshotUpdatesCannotApplyPausedAfterCompleted() throws Exception { |
| 513 | + final var repoName = "test-repo-name"; |
| 514 | + final var snapshot1 = snapshot(repoName, "test-snap-1"); |
| 515 | + final var snapshot2 = snapshot(repoName, "test-snap-2"); |
| 516 | + final var indexName = "test-index-name"; |
| 517 | + final var shardId = new ShardId(index(indexName), 0); |
| 518 | + final var repositoryShardId = new RepositoryShardId(indexId(indexName), 0); |
| 519 | + final var originalNodeId = uuid(); |
| 520 | + final var otherNodeId = uuid(); |
| 521 | + |
| 522 | + final SnapshotsInProgress.Entry runningSnapshotEntry = snapshotEntry( |
| 523 | + snapshot1, |
| 524 | + Collections.singletonMap(indexName, repositoryShardId.index()), |
| 525 | + Map.of(shardId, initShardStatus(originalNodeId)) |
| 526 | + ); |
| 527 | + |
| 528 | + final SnapshotsInProgress.Entry queuedSnapshotEntry = snapshotEntry( |
| 529 | + snapshot2, |
| 530 | + Collections.singletonMap(indexName, repositoryShardId.index()), |
| 531 | + Map.of(shardId, SnapshotsInProgress.ShardSnapshotStatus.UNASSIGNED_QUEUED) |
| 532 | + ); |
| 533 | + |
| 534 | + final ClusterState initialState = stateWithSnapshots( |
| 535 | + ClusterState.builder(ClusterState.EMPTY_STATE) |
| 536 | + .nodes( |
| 537 | + DiscoveryNodes.builder() |
| 538 | + .add(DiscoveryNodeUtils.create(originalNodeId)) |
| 539 | + .localNodeId(originalNodeId) |
| 540 | + .masterNodeId(originalNodeId) |
| 541 | + .build() |
| 542 | + ) |
| 543 | + .routingTable( |
| 544 | + RoutingTable.builder() |
| 545 | + .add( |
| 546 | + IndexRoutingTable.builder(shardId.getIndex()) |
| 547 | + .addShard(TestShardRouting.newShardRouting(shardId, originalNodeId, true, ShardRoutingState.STARTED)) |
| 548 | + ) |
| 549 | + .build() |
| 550 | + ) |
| 551 | + .build(), |
| 552 | + repoName, |
| 553 | + runningSnapshotEntry, |
| 554 | + queuedSnapshotEntry |
| 555 | + ); |
| 556 | + |
| 557 | + assertEquals( |
| 558 | + SnapshotsInProgress.ShardState.QUEUED, |
| 559 | + SnapshotsInProgress.get(initialState).snapshot(snapshot2).shards().get(shardId).state() |
| 560 | + ); |
| 561 | + |
| 562 | + /** |
| 563 | + * In this scenario, {@link originalNodeId} is the original shard owner that resends PAUSED, and {@link otherNodeId} is the new |
| 564 | + * shard owner that completes the shard snapshot. The production code doesn't verify node ownership, but it's helpful for the test. |
| 565 | + */ |
| 566 | + |
| 567 | + // Ultimately ignored statuses. |
| 568 | + var pausedOnOriginalNodeStatus = pausedShardSnapshotStatus(originalNodeId, shardId, runningSnapshotEntry); |
| 569 | + var successfulOnOriginalNodeStatus = successShardSnapshotStatus(originalNodeId, shardId, runningSnapshotEntry); |
| 570 | + |
| 571 | + // Ultimately applied statuses. |
| 572 | + var successfulOnOtherNodeStatus = successShardSnapshotStatus(otherNodeId, shardId, runningSnapshotEntry); |
| 573 | + var failedOnOtherNodeStatus = failedShardSnapshotStatus(otherNodeId, shardId, runningSnapshotEntry); |
| 574 | + |
| 575 | + var completedUpdateOnOtherNode = new SnapshotsService.ShardSnapshotUpdate( |
| 576 | + snapshot1, |
| 577 | + shardId, |
| 578 | + null, |
| 579 | + // Success and failure are both completed shard snapshot states, so paused should be ignored when either is set. |
| 580 | + randomBoolean() ? successfulOnOtherNodeStatus : failedOnOtherNodeStatus, |
| 581 | + ActionTestUtils.assertNoFailureListener(t -> {}) |
| 582 | + ); |
| 583 | + var pausedUpdateOnOriginalNode = new SnapshotsService.ShardSnapshotUpdate( |
| 584 | + snapshot1, |
| 585 | + shardId, |
| 586 | + null, |
| 587 | + pausedOnOriginalNodeStatus, |
| 588 | + ActionTestUtils.assertNoFailureListener(t -> {}) |
| 589 | + ); |
| 590 | + var completedUpdateOnOriginalNode = new SnapshotsService.ShardSnapshotUpdate( |
| 591 | + snapshot1, |
| 592 | + shardId, |
| 593 | + null, |
| 594 | + successfulOnOriginalNodeStatus, |
| 595 | + ActionTestUtils.assertNoFailureListener(t -> {}) |
| 596 | + ); |
| 597 | + |
| 598 | + boolean random = randomBoolean(); |
| 599 | + ClusterState updatedState; |
| 600 | + if (randomBoolean()) { |
| 601 | + updatedState = applyUpdates( |
| 602 | + initialState, |
| 603 | + // Randomize the order of completed and paused updates but make sure that there's one of each. If the paused update comes |
| 604 | + // after the completed update, paused should be ignored and the shard snapshot remains in a completed state. |
| 605 | + random ? completedUpdateOnOtherNode : pausedUpdateOnOriginalNode, |
| 606 | + random ? pausedUpdateOnOriginalNode : completedUpdateOnOtherNode |
| 607 | + ); |
| 608 | + } else { |
| 609 | + updatedState = applyUpdates( |
| 610 | + initialState, |
| 611 | + random ? completedUpdateOnOtherNode : pausedUpdateOnOriginalNode, |
| 612 | + random ? pausedUpdateOnOriginalNode : completedUpdateOnOtherNode, |
| 613 | + // Randomly add another update that will be ignored because the shard snapshot is complete. |
| 614 | + // Note: the originalNodeId is used for this update, so we can verify afterward that the update is not applied. |
| 615 | + randomBoolean() ? completedUpdateOnOriginalNode : pausedUpdateOnOriginalNode |
| 616 | + ); |
| 617 | + } |
| 618 | + |
| 619 | + assertTrue(SnapshotsInProgress.get(updatedState).snapshot(snapshot1).shards().get(shardId).state().completed()); |
| 620 | + assertEquals(otherNodeId, SnapshotsInProgress.get(updatedState).snapshot(snapshot1).shards().get(shardId).nodeId()); |
| 621 | + |
| 622 | + // Since the first snapshot completed, the second snapshot should be set to proceed with snapshotting the same shard. |
| 623 | + assertEquals( |
| 624 | + SnapshotsInProgress.ShardState.INIT, |
| 625 | + SnapshotsInProgress.get(updatedState).snapshot(snapshot2).shards().get(shardId).state() |
| 626 | + ); |
| 627 | + } |
| 628 | + |
470 | 629 | public void testSnapshottingIndicesExcludesClones() { |
471 | 630 | final String repoName = "test-repo"; |
472 | 631 | final String indexName = "index"; |
@@ -570,10 +729,21 @@ private static void assertIsNoop(ClusterState state, SnapshotsService.SnapshotTa |
570 | 729 | assertSame(applyUpdates(state, shardCompletion), state); |
571 | 730 | } |
572 | 731 |
|
| 732 | + /** |
| 733 | + * Runs the shard snapshot updates through a ClusterStateTaskExecutor that executes the |
| 734 | + * {@link SnapshotsService.SnapshotShardsUpdateContext}. |
| 735 | + * |
| 736 | + * @param state Original cluster state |
| 737 | + * @param updates List of SnapshotTask tasks to apply to the cluster state |
| 738 | + * @return An updated cluster state, or, if no change were made, the original given cluster state. |
| 739 | + */ |
573 | 740 | private static ClusterState applyUpdates(ClusterState state, SnapshotsService.SnapshotTask... updates) throws Exception { |
574 | 741 | return ClusterStateTaskExecutorUtils.executeAndAssertSuccessful(state, batchExecutionContext -> { |
575 | 742 | final SnapshotsInProgress existing = SnapshotsInProgress.get(batchExecutionContext.initialState()); |
576 | | - final var context = new SnapshotsService.SnapshotShardsUpdateContext(batchExecutionContext, (a, b, c) -> {}); |
| 743 | + final var context = new SnapshotsService.SnapshotShardsUpdateContext( |
| 744 | + batchExecutionContext, |
| 745 | + /* on completion handler */ (shardSnapshotUpdateResult, newlyCompletedEntries, updatedRepositories) -> {} |
| 746 | + ); |
577 | 747 | final SnapshotsInProgress updated = context.computeUpdatedState(); |
578 | 748 | context.completeWithUpdatedState(updated); |
579 | 749 | if (existing == updated) { |
@@ -617,6 +787,9 @@ private static SnapshotsInProgress.Entry cloneEntry( |
617 | 787 | .withClones(clones); |
618 | 788 | } |
619 | 789 |
|
| 790 | + /** |
| 791 | + * Helper method to create a shard snapshot status with state {@link SnapshotsInProgress.ShardState#INIT}. |
| 792 | + */ |
620 | 793 | private static SnapshotsInProgress.ShardSnapshotStatus initShardStatus(String nodeId) { |
621 | 794 | return new SnapshotsInProgress.ShardSnapshotStatus(nodeId, ShardGeneration.newGeneration(random())); |
622 | 795 | } |
|
0 commit comments