Skip to content

Commit 6481e8a

Browse files
authored
Fix deleting index during snapshot finalization (#104380)
* Fix deleting index during snapshot finalization Today if an index is deleted during a very specific order of snapshot finalizations then it's possible we'll miscalculate the latest shard generations for the shards in that index, causing the deletion of a shard-level `index-UUID` blob which prevents further snapshots of that shard. Backports #103817 to 7.17 Closes #101029 * Test fixup
1 parent aba4da4 commit 6481e8a

File tree

7 files changed

+182
-17
lines changed

7 files changed

+182
-17
lines changed

docs/changelog/103817.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 103817
2+
summary: Fix deleting index during snapshot finalization
3+
area: Snapshot/Restore
4+
type: bug
5+
issues:
6+
- 101029

server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,21 @@
1717
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
1818
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
1919
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse;
20+
import org.elasticsearch.action.admin.indices.delete.DeleteIndexClusterStateUpdateRequest;
21+
import org.elasticsearch.action.support.ActionTestUtils;
2022
import org.elasticsearch.action.support.GroupedActionListener;
2123
import org.elasticsearch.action.support.PlainActionFuture;
2224
import org.elasticsearch.action.support.master.AcknowledgedResponse;
2325
import org.elasticsearch.cluster.SnapshotDeletionsInProgress;
2426
import org.elasticsearch.cluster.SnapshotsInProgress;
27+
import org.elasticsearch.cluster.metadata.MetadataDeleteIndexService;
2528
import org.elasticsearch.common.Strings;
2629
import org.elasticsearch.common.settings.Settings;
30+
import org.elasticsearch.common.util.concurrent.ListenableFuture;
2731
import org.elasticsearch.common.util.concurrent.UncategorizedExecutionException;
32+
import org.elasticsearch.core.TimeValue;
2833
import org.elasticsearch.discovery.AbstractDisruptionTestCase;
34+
import org.elasticsearch.index.Index;
2935
import org.elasticsearch.plugins.Plugin;
3036
import org.elasticsearch.repositories.IndexId;
3137
import org.elasticsearch.repositories.RepositoryData;
@@ -37,6 +43,7 @@
3743
import org.elasticsearch.test.disruption.NetworkDisruption;
3844
import org.elasticsearch.test.transport.MockTransportService;
3945
import org.elasticsearch.transport.RemoteTransportException;
46+
import org.elasticsearch.transport.TransportService;
4047

4148
import java.io.IOException;
4249
import java.nio.file.Files;
@@ -45,9 +52,12 @@
4552
import java.util.ArrayList;
4653
import java.util.Arrays;
4754
import java.util.Collection;
55+
import java.util.HashMap;
4856
import java.util.List;
57+
import java.util.Map;
4958
import java.util.concurrent.ExecutionException;
5059
import java.util.concurrent.TimeUnit;
60+
import java.util.stream.Collectors;
5161

5262
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
5363
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
@@ -2014,6 +2024,115 @@ public void testSnapshotAndCloneQueuedAfterMissingShard() throws Exception {
20142024
assertThat(snapshot2.get().getSnapshotInfo().state(), is(SnapshotState.PARTIAL));
20152025
}
20162026

2027+
public void testDeleteIndexWithOutOfOrderFinalization() throws Exception {
2028+
2029+
final String indexToDelete = "index-to-delete";
2030+
final List<String> indexNames = Arrays.asList(indexToDelete, "index-0", "index-1", "index-2");
2031+
2032+
for (final String indexName : indexNames) {
2033+
assertAcked(prepareCreate(indexName, indexSettingsNoReplicas(1)));
2034+
}
2035+
2036+
final String repoName = "test-repo";
2037+
createRepository(repoName, "fs");
2038+
2039+
// block the update-shard-snapshot-status requests so we can execute them in a specific order
2040+
final MockTransportService masterTransportService = (MockTransportService) internalCluster().getCurrentMasterNodeInstance(
2041+
TransportService.class
2042+
);
2043+
final Map<String, ListenableFuture<Void>> otherIndexSnapshotListeners = indexNames.stream()
2044+
.collect(Collectors.toMap(k -> k, k -> new ListenableFuture<>()));
2045+
masterTransportService.<UpdateIndexShardSnapshotStatusRequest>addRequestHandlingBehavior(
2046+
SnapshotsService.UPDATE_SNAPSHOT_STATUS_ACTION_NAME,
2047+
(handler, request, channel, task) -> {
2048+
final String indexName = request.shardId().getIndexName();
2049+
if (indexName.equals(indexToDelete)) {
2050+
handler.messageReceived(request, channel, task);
2051+
} else {
2052+
final ListenableFuture<Void> listener = otherIndexSnapshotListeners.get(indexName);
2053+
assertNotNull(indexName, listener);
2054+
listener.addListener(
2055+
ActionTestUtils.assertNoFailureListener(ignored -> handler.messageReceived(request, channel, task))
2056+
);
2057+
}
2058+
}
2059+
);
2060+
2061+
// start the snapshots, each targeting index-to-delete and one other index so we can control their finalization order
2062+
final HashMap<String, Runnable> snapshotCompleters = new HashMap<String, Runnable>();
2063+
for (final String blockingIndex : Arrays.asList("index-0", "index-1", "index-2")) {
2064+
final String snapshotName = "snapshot-with-" + blockingIndex;
2065+
final ActionFuture<CreateSnapshotResponse> snapshotFuture = clusterAdmin().prepareCreateSnapshot(repoName, snapshotName)
2066+
.setWaitForCompletion(true)
2067+
.setPartial(true)
2068+
.setIndices(indexToDelete, blockingIndex)
2069+
.execute();
2070+
2071+
// ensure each snapshot has really started before moving on to the next one
2072+
awaitClusterState(
2073+
cs -> cs.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY)
2074+
.forRepo(repoName)
2075+
.stream()
2076+
.anyMatch(e -> e.snapshot().getSnapshotId().getName().equals(snapshotName))
2077+
);
2078+
2079+
snapshotCompleters.put(blockingIndex, () -> {
2080+
assertFalse(snapshotFuture.isDone());
2081+
otherIndexSnapshotListeners.get(blockingIndex).onResponse(null);
2082+
assertEquals(SnapshotState.SUCCESS, snapshotFuture.actionGet(10, TimeUnit.SECONDS).getSnapshotInfo().state());
2083+
});
2084+
}
2085+
2086+
// set up to delete the index at a very specific moment during finalization
2087+
final MetadataDeleteIndexService masterDeleteIndexService = internalCluster().getCurrentMasterNodeInstance(
2088+
MetadataDeleteIndexService.class
2089+
);
2090+
final Thread indexRecreationThread = new Thread(() -> {
2091+
try {
2092+
// wait until the snapshot has entered finalization
2093+
awaitClusterState(
2094+
cs -> cs.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY)
2095+
.forRepo(repoName)
2096+
.stream()
2097+
.anyMatch(e -> e.snapshot().getSnapshotId().getName().equals("snapshot-with-index-1") && e.state().completed())
2098+
);
2099+
2100+
// execute the index deletion _directly on the master_ so it happens before the snapshot finalization executes
2101+
assertTrue(
2102+
PlainActionFuture.<AcknowledgedResponse, Exception>get(
2103+
future -> masterDeleteIndexService.deleteIndices(
2104+
new DeleteIndexClusterStateUpdateRequest().indices(
2105+
new Index[] { internalCluster().clusterService().state().metadata().index(indexToDelete).getIndex() }
2106+
).ackTimeout(TimeValue.timeValueSeconds(10)).masterNodeTimeout(TimeValue.timeValueSeconds(10)),
2107+
future
2108+
)
2109+
).isAcknowledged()
2110+
);
2111+
2112+
// ultimately create the index again so that taking a full snapshot will pick up any missing shard gen blob, and deleting
2113+
// that full snapshot will clean up all dangling shard-level blobs
2114+
createIndex(indexToDelete, indexSettingsNoReplicas(1).build());
2115+
} catch (Exception e) {
2116+
throw new AssertionError(e);
2117+
}
2118+
});
2119+
indexRecreationThread.start();
2120+
2121+
// release the snapshots to be finalized, in this order
2122+
for (final String blockingIndex : Arrays.asList("index-1", "index-2", "index-0")) {
2123+
snapshotCompleters.get(blockingIndex).run();
2124+
}
2125+
2126+
indexRecreationThread.join();
2127+
masterTransportService.clearAllRules();
2128+
2129+
// create a full snapshot to verify that the repo is still ok
2130+
createFullSnapshot(repoName, "final-full-snapshot");
2131+
2132+
// delete the full snapshot to clean up the leftover shard-level metadata (which trips repo consistency assertions otherwise)
2133+
startDeleteSnapshot(repoName, "final-full-snapshot").actionGet(10, TimeUnit.SECONDS);
2134+
}
2135+
20172136
private static void assertSnapshotStatusCountOnRepo(String otherBlockedRepoName, int count) {
20182137
final SnapshotsStatusResponse snapshotsStatusResponse = client().admin()
20192138
.cluster()

server/src/main/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexClusterStateUpdateRequest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*/
1515
public class DeleteIndexClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest<DeleteIndexClusterStateUpdateRequest> {
1616

17-
DeleteIndexClusterStateUpdateRequest() {
17+
public DeleteIndexClusterStateUpdateRequest() {
1818

1919
}
2020
}

server/src/main/java/org/elasticsearch/repositories/FinalizeSnapshotContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public Map<RepositoryShardId, Set<ShardGeneration>> obsoleteShardGenerations() {
9595
}
9696

9797
public ClusterState updatedClusterState(ClusterState state) {
98-
final ClusterState updatedState = SnapshotsService.stateWithoutSnapshot(state, snapshotInfo.snapshot());
98+
final ClusterState updatedState = SnapshotsService.stateWithoutSnapshot(state, snapshotInfo.snapshot(), updatedShardGenerations);
9999
obsoleteGenerations.set(
100100
updatedState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY)
101101
.obsoleteGenerations(snapshotInfo.repository(), state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY))

server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,11 @@ public ShardGeneration getShardGen(IndexId indexId, int shardId) {
141141
return generations.get(shardId);
142142
}
143143

144+
public boolean hasShardGen(RepositoryShardId repositoryShardId) {
145+
final List<ShardGeneration> indexShardGens = getGens(repositoryShardId.index());
146+
return repositoryShardId.shardId() < indexShardGens.size() && indexShardGens.get(repositoryShardId.shardId()) != null;
147+
}
148+
144149
public List<ShardGeneration> getGens(IndexId indexId) {
145150
return shardGenerations.getOrDefault(indexId, Collections.emptyList());
146151
}

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,7 @@ private void cleanupOldShardGens(
15041504
(indexId, gens) -> gens.forEach(
15051505
(shardId, oldGen) -> toDelete.add(
15061506
shardContainer(indexId, shardId).path().buildAsString().substring(prefixPathLen) + INDEX_FILE_PREFIX + oldGen
1507+
.toBlobNamePart()
15071508
)
15081509
)
15091510
);

server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ private void startCloning(Repository repository, SnapshotsInProgress.Entry clone
761761
endingSnapshots.add(targetSnapshot);
762762
initializingClones.remove(targetSnapshot);
763763
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
764-
removeFailedSnapshotFromClusterState(targetSnapshot, e, null, null);
764+
removeFailedSnapshotFromClusterState(targetSnapshot, e, null, null, ShardGenerations.EMPTY);
765765
};
766766

767767
// 1. step, load SnapshotInfo to make sure that source snapshot was successful for the indices we want to clone
@@ -1194,7 +1194,8 @@ public void onFailure(String source, Exception e) {
11941194
snapshot.snapshot(),
11951195
e,
11961196
null,
1197-
new CleanupAfterErrorListener(userCreateSnapshotListener, e)
1197+
new CleanupAfterErrorListener(userCreateSnapshotListener, e),
1198+
ShardGenerations.EMPTY
11981199
);
11991200
}
12001201

@@ -1238,7 +1239,8 @@ public void onFailure(Exception e) {
12381239
snapshot.snapshot(),
12391240
e,
12401241
null,
1241-
new CleanupAfterErrorListener(userCreateSnapshotListener, e)
1242+
new CleanupAfterErrorListener(userCreateSnapshotListener, e),
1243+
ShardGenerations.EMPTY
12421244
);
12431245
}
12441246
});
@@ -1876,14 +1878,21 @@ private void endSnapshot(SnapshotsInProgress.Entry entry, Metadata metadata, @Nu
18761878
entry.snapshot(),
18771879
new SnapshotException(snapshot, "Aborted on initialization"),
18781880
repositoryData,
1879-
null
1881+
null,
1882+
ShardGenerations.EMPTY
18801883
);
18811884
return;
18821885
}
18831886
if (entry.isClone() && entry.state() == State.FAILED) {
18841887
logger.debug("Removing failed snapshot clone [{}] from cluster state", entry);
18851888
if (newFinalization) {
1886-
removeFailedSnapshotFromClusterState(snapshot, new SnapshotException(snapshot, entry.failure()), null, null);
1889+
removeFailedSnapshotFromClusterState(
1890+
snapshot,
1891+
new SnapshotException(snapshot, entry.failure()),
1892+
null,
1893+
null,
1894+
ShardGenerations.EMPTY
1895+
);
18871896
}
18881897
return;
18891898
}
@@ -2055,13 +2064,30 @@ private void finalizeSnapshotEntry(Snapshot snapshot, Metadata metadata, Reposit
20552064
completeListenersIgnoringException(endAndGetListenersToResolve(writtenSnapshotInfo.snapshot()), result);
20562065
logger.info("snapshot [{}] completed with state [{}]", snapshot, writtenSnapshotInfo.state());
20572066
runNextQueuedOperation(result.v1(), repository, true);
2058-
}, e -> handleFinalizationFailure(e, snapshot, repositoryData))
2067+
},
2068+
e -> handleFinalizationFailure(
2069+
e,
2070+
snapshot,
2071+
repositoryData,
2072+
// we might have written the new root blob before failing here, so we must use the updated shardGenerations
2073+
shardGenerations
2074+
)
2075+
)
20592076
)
20602077
);
2061-
}, e -> handleFinalizationFailure(e, snapshot, repositoryData));
2078+
},
2079+
e -> handleFinalizationFailure(
2080+
e,
2081+
snapshot,
2082+
repositoryData,
2083+
// a failure here means the root blob was not updated, but the updated shard generation blobs are all in place so we can
2084+
// use the updated shardGenerations for all pending shard snapshots
2085+
shardGenerations
2086+
)
2087+
);
20622088
} catch (Exception e) {
20632089
assert false : new AssertionError(e);
2064-
handleFinalizationFailure(e, snapshot, repositoryData);
2090+
handleFinalizationFailure(e, snapshot, repositoryData, ShardGenerations.EMPTY);
20652091
}
20662092
}
20672093

@@ -2113,7 +2139,12 @@ private List<ActionListener<Tuple<RepositoryData, SnapshotInfo>>> endAndGetListe
21132139
* @param snapshot snapshot that failed to finalize
21142140
* @param repositoryData current repository data for the snapshot's repository
21152141
*/
2116-
private void handleFinalizationFailure(Exception e, Snapshot snapshot, RepositoryData repositoryData) {
2142+
private void handleFinalizationFailure(
2143+
Exception e,
2144+
Snapshot snapshot,
2145+
RepositoryData repositoryData,
2146+
ShardGenerations shardGenerations
2147+
) {
21172148
if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) {
21182149
// Failure due to not being master any more, don't try to remove snapshot from cluster state the next master
21192150
// will try ending this snapshot again
@@ -2125,7 +2156,7 @@ private void handleFinalizationFailure(Exception e, Snapshot snapshot, Repositor
21252156
failAllListenersOnMasterFailOver(e);
21262157
} else {
21272158
logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e);
2128-
removeFailedSnapshotFromClusterState(snapshot, e, repositoryData, null);
2159+
removeFailedSnapshotFromClusterState(snapshot, e, repositoryData, null, shardGenerations);
21292160
}
21302161
}
21312162

@@ -2251,7 +2282,7 @@ private static Tuple<ClusterState, List<SnapshotDeletionsInProgress.Entry>> read
22512282
* @param snapshot snapshot for which to remove the snapshot operation
22522283
* @return updated cluster state
22532284
*/
2254-
public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot snapshot) {
2285+
public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot snapshot, ShardGenerations shardGenerations) {
22552286
final SnapshotsInProgress snapshots = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
22562287
ClusterState result = state;
22572288
int indexOfEntry = -1;
@@ -2312,7 +2343,8 @@ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sna
23122343
final ShardSnapshotStatus shardState = finishedShardEntry.value;
23132344
final RepositoryShardId repositoryShardId = finishedShardEntry.key;
23142345
if (shardState.state() != ShardState.SUCCESS
2315-
|| previousEntry.shardsByRepoShardId().containsKey(repositoryShardId) == false) {
2346+
|| previousEntry.shardsByRepoShardId().containsKey(repositoryShardId) == false
2347+
|| shardGenerations.hasShardGen(finishedShardEntry.key) == false) {
23162348
continue;
23172349
}
23182350
updatedShardAssignments = maybeAddUpdatedAssignment(
@@ -2329,7 +2361,8 @@ public static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sna
23292361
.shardsByRepoShardId()) {
23302362
final ShardSnapshotStatus shardState = finishedShardEntry.value;
23312363
if (shardState.state() == ShardState.SUCCESS
2332-
&& previousEntry.shardsByRepoShardId().containsKey(finishedShardEntry.key)) {
2364+
&& previousEntry.shardsByRepoShardId().containsKey(finishedShardEntry.key)
2365+
&& shardGenerations.hasShardGen(finishedShardEntry.key)) {
23332366
updatedShardAssignments = maybeAddUpdatedAssignment(
23342367
updatedShardAssignments,
23352368
shardState,
@@ -2417,14 +2450,15 @@ private void removeFailedSnapshotFromClusterState(
24172450
Snapshot snapshot,
24182451
Exception failure,
24192452
@Nullable RepositoryData repositoryData,
2420-
@Nullable CleanupAfterErrorListener listener
2453+
@Nullable CleanupAfterErrorListener listener,
2454+
ShardGenerations shardGenerations
24212455
) {
24222456
assert failure != null : "Failure must be supplied";
24232457
clusterService.submitStateUpdateTask("remove snapshot metadata", new ClusterStateUpdateTask() {
24242458

24252459
@Override
24262460
public ClusterState execute(ClusterState currentState) {
2427-
final ClusterState updatedState = stateWithoutSnapshot(currentState, snapshot);
2461+
final ClusterState updatedState = stateWithoutSnapshot(currentState, snapshot, shardGenerations);
24282462
assert updatedState == currentState || endingSnapshots.contains(snapshot)
24292463
: "did not track [" + snapshot + "] in ending snapshots while removing it from the cluster state";
24302464
// now check if there are any delete operations that refer to the just failed snapshot and remove the snapshot from them

0 commit comments

Comments
 (0)