Skip to content

Commit a14ad5f

Browse files
committed
Finalize all snapshots completed by shard snapshot updates (#105245)
Today when processing a batch of `ShardSnapshotUpdate` tasks each update's listener considers whether the corresponding snapshot has completed and, if so, it enqueues it for finalization. This is somewhat inefficient since we may be processing many shard snapshot updates for the same few snapshots, but there's no need to check each snapshot for completion more than once. It's also insufficient since the completion of a shard snapshot may cause the completion of subsequent snapshots too (e.g. they can go from state `QUEUED` straight to `MISSING`). This commit detaches the completion handling from the individual shard snapshot updates and instead makes sure that any snapshot that reaches a completed state is enqueued for finalization. Closes #104939
1 parent c5bbd4c commit a14ad5f

File tree

4 files changed

+267
-44
lines changed

4 files changed

+267
-44
lines changed

docs/changelog/105245.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 105245
2+
summary: Finalize all snapshots completed by shard snapshot updates
3+
area: Snapshot/Restore
4+
type: bug
5+
issues:
6+
- 104939

server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.action.support.ActionFilters;
2424
import org.elasticsearch.action.support.ContextPreservingActionListener;
2525
import org.elasticsearch.action.support.GroupedActionListener;
26+
import org.elasticsearch.action.support.RefCountingRunnable;
2627
import org.elasticsearch.action.support.master.TransportMasterNodeAction;
2728
import org.elasticsearch.cluster.ClusterChangedEvent;
2829
import org.elasticsearch.cluster.ClusterState;
@@ -70,7 +71,6 @@
7071
import org.elasticsearch.common.util.Maps;
7172
import org.elasticsearch.common.util.concurrent.EsExecutors;
7273
import org.elasticsearch.common.util.concurrent.ListenableFuture;
73-
import org.elasticsearch.common.util.concurrent.RunOnce;
7474
import org.elasticsearch.core.Nullable;
7575
import org.elasticsearch.core.SuppressForbidden;
7676
import org.elasticsearch.core.Tuple;
@@ -188,6 +188,8 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement
188188

189189
private final MasterServiceTaskQueue<SnapshotTask> masterServiceTaskQueue;
190190

191+
private final ShardSnapshotUpdateCompletionHandler shardSnapshotUpdateCompletionHandler;
192+
191193
/**
192194
* Setting that specifies the maximum number of allowed concurrent snapshot create and delete operations in the
193195
* cluster state. The number of concurrent operations in a cluster state is defined as the sum of
@@ -238,6 +240,7 @@ public SnapshotsService(
238240
this.systemIndices = systemIndices;
239241

240242
this.masterServiceTaskQueue = clusterService.createTaskQueue("snapshots-service", Priority.NORMAL, new SnapshotTaskExecutor());
243+
this.shardSnapshotUpdateCompletionHandler = this::handleShardSnapshotUpdateCompletion;
241244
}
242245

243246
/**
@@ -3084,16 +3087,19 @@ static final class SnapshotShardsUpdateContext {
30843087
// updates that were used to update an existing in-progress shard snapshot
30853088
private final Set<ShardSnapshotUpdate> executedUpdates = new HashSet<>();
30863089

3087-
// enqueues a reroute because some shard snapshots finished
3088-
private final Runnable rerouteRunnable;
3090+
// handles the completion of some shard-snapshot updates, performing the next possible actions
3091+
private final ShardSnapshotUpdateCompletionHandler completionHandler;
3092+
3093+
// entries that became complete due to this batch of updates
3094+
private final List<SnapshotsInProgress.Entry> newlyCompletedEntries = new ArrayList<>();
30893095

30903096
SnapshotShardsUpdateContext(
30913097
ClusterStateTaskExecutor.BatchExecutionContext<SnapshotTask> batchExecutionContext,
3092-
Runnable rerouteRunnable
3098+
ShardSnapshotUpdateCompletionHandler completionHandler
30933099
) {
30943100
this.batchExecutionContext = batchExecutionContext;
30953101
this.initialState = batchExecutionContext.initialState();
3096-
this.rerouteRunnable = new RunOnce(rerouteRunnable); // RunOnce to avoid enqueueing O(#shards) listeners
3102+
this.completionHandler = completionHandler;
30973103
this.updatesByRepo = new HashMap<>();
30983104
for (final var taskContext : batchExecutionContext.taskContexts()) {
30993105
if (taskContext.getTask() instanceof ShardSnapshotUpdate task) {
@@ -3113,7 +3119,11 @@ SnapshotsInProgress computeUpdatedState() {
31133119
}
31143120
final List<SnapshotsInProgress.Entry> newEntries = new ArrayList<>(oldEntries.size());
31153121
for (SnapshotsInProgress.Entry entry : oldEntries) {
3116-
newEntries.add(applyToEntry(entry, updates.getValue()));
3122+
final var newEntry = applyToEntry(entry, updates.getValue());
3123+
newEntries.add(newEntry);
3124+
if (newEntry != entry && newEntry.state().completed()) {
3125+
newlyCompletedEntries.add(newEntry);
3126+
}
31173127
}
31183128
updated = updated.withUpdatedEntriesForRepo(repoName, newEntries);
31193129
}
@@ -3132,12 +3142,20 @@ SnapshotsInProgress computeUpdatedState() {
31323142
void completeWithUpdatedState(SnapshotsInProgress snapshotsInProgress) {
31333143
if (updatesByRepo.isEmpty() == false) {
31343144
final var result = new ShardSnapshotUpdateResult(initialState.metadata(), snapshotsInProgress);
3135-
for (final var taskContext : batchExecutionContext.taskContexts()) {
3136-
if (taskContext.getTask() instanceof ShardSnapshotUpdate task) {
3137-
taskContext.success(() -> {
3138-
rerouteRunnable.run();
3139-
task.listener.onResponse(result);
3140-
});
3145+
try (
3146+
var onCompletionRefs = new RefCountingRunnable(
3147+
() -> completionHandler.handleCompletion(result, newlyCompletedEntries, updatesByRepo.keySet())
3148+
)
3149+
) {
3150+
for (final var taskContext : batchExecutionContext.taskContexts()) {
3151+
if (taskContext.getTask() instanceof ShardSnapshotUpdate task) {
3152+
final var ref = onCompletionRefs.acquire();
3153+
taskContext.success(() -> {
3154+
try (ref) {
3155+
task.listener.onResponse(result);
3156+
}
3157+
});
3158+
}
31413159
}
31423160
}
31433161
}
@@ -3376,6 +3394,37 @@ private ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shardsBuilder() {
33763394
*/
33773395
record ShardSnapshotUpdateResult(Metadata metadata, SnapshotsInProgress snapshotsInProgress) {}
33783396

3397+
interface ShardSnapshotUpdateCompletionHandler {
3398+
void handleCompletion(
3399+
ShardSnapshotUpdateResult shardSnapshotUpdateResult,
3400+
List<SnapshotsInProgress.Entry> newlyCompletedEntries,
3401+
Set<String> updatedRepositories
3402+
);
3403+
}
3404+
3405+
private void handleShardSnapshotUpdateCompletion(
3406+
ShardSnapshotUpdateResult shardSnapshotUpdateResult,
3407+
List<SnapshotsInProgress.Entry> newlyCompletedEntries,
3408+
Set<String> updatedRepositories
3409+
) {
3410+
// Maybe this state update completed one or more snapshots. If we are not already ending them because of some earlier update, end
3411+
// them now.
3412+
final var snapshotsInProgress = shardSnapshotUpdateResult.snapshotsInProgress();
3413+
for (final var newlyCompletedEntry : newlyCompletedEntries) {
3414+
if (endingSnapshots.contains(newlyCompletedEntry.snapshot()) == false) {
3415+
endSnapshot(newlyCompletedEntry, shardSnapshotUpdateResult.metadata, null);
3416+
}
3417+
}
3418+
// Likewise this state update may enable some new shard clones on any affected repository, so check them all.
3419+
for (final var updatedRepository : updatedRepositories) {
3420+
startExecutableClones(snapshotsInProgress, updatedRepository);
3421+
}
3422+
// Also shard snapshot completions may free up some shards to move to other nodes, so we must trigger a reroute.
3423+
if (updatedRepositories.isEmpty() == false) {
3424+
rerouteService.reroute("after shards snapshot update", Priority.NORMAL, ActionListener.noop());
3425+
}
3426+
}
3427+
33793428
/**
33803429
* An update to the snapshot state of a shard.
33813430
*
@@ -3455,23 +3504,13 @@ private void innerUpdateSnapshotState(
34553504
ShardSnapshotStatus updatedState,
34563505
ActionListener<Void> listener
34573506
) {
3458-
var update = new ShardSnapshotUpdate(snapshot, shardId, repoShardId, updatedState, listener.delegateFailure((delegate, result) -> {
3459-
try {
3460-
delegate.onResponse(null);
3461-
} finally {
3462-
// Maybe this state update completed the snapshot. If we are not already ending it because of a concurrent
3463-
// state update we check if its state is completed and end it if it is.
3464-
final SnapshotsInProgress snapshotsInProgress = result.snapshotsInProgress();
3465-
if (endingSnapshots.contains(snapshot) == false) {
3466-
final SnapshotsInProgress.Entry updatedEntry = snapshotsInProgress.snapshot(snapshot);
3467-
// If the entry is still in the cluster state and is completed, try finalizing the snapshot in the repo
3468-
if (updatedEntry != null && updatedEntry.state().completed()) {
3469-
endSnapshot(updatedEntry, result.metadata(), null);
3470-
}
3471-
}
3472-
startExecutableClones(snapshotsInProgress, snapshot.getRepository());
3473-
}
3474-
}));
3507+
var update = new ShardSnapshotUpdate(
3508+
snapshot,
3509+
shardId,
3510+
repoShardId,
3511+
updatedState,
3512+
listener.delegateFailure((delegate, result) -> delegate.onResponse(null))
3513+
);
34753514
logger.trace("received updated snapshot restore state [{}]", update);
34763515
masterServiceTaskQueue.submitTask("update snapshot state", update, null);
34773516
}
@@ -3751,7 +3790,7 @@ public ClusterState execute(BatchExecutionContext<SnapshotTask> batchExecutionCo
37513790
final ClusterState state = batchExecutionContext.initialState();
37523791
final SnapshotShardsUpdateContext shardsUpdateContext = new SnapshotShardsUpdateContext(
37533792
batchExecutionContext,
3754-
() -> rerouteService.reroute("after shards snapshot update", Priority.NORMAL, ActionListener.noop())
3793+
shardSnapshotUpdateCompletionHandler
37553794
);
37563795
final SnapshotsInProgress initialSnapshots = SnapshotsInProgress.get(state);
37573796
SnapshotsInProgress snapshotsInProgress = shardsUpdateContext.computeUpdatedState();

0 commit comments

Comments
 (0)