@@ -596,7 +596,13 @@ public void cloneShardSnapshot(
596
596
existingSnapshots = tuple .v1 ();
597
597
} else {
598
598
newGen = ShardGeneration .newGeneration ();
599
- existingSnapshots = buildBlobStoreIndexShardSnapshots (index , Collections .emptySet (), shardContainer , shardGeneration ).v1 ();
599
+ existingSnapshots = buildBlobStoreIndexShardSnapshots (
600
+ index ,
601
+ shardNum ,
602
+ Collections .emptySet (),
603
+ shardContainer ,
604
+ shardGeneration
605
+ ).v1 ();
600
606
existingShardGen = shardGeneration ;
601
607
}
602
608
SnapshotFiles existingTargetFiles = null ;
@@ -1309,6 +1315,7 @@ protected void doRun() throws Exception {
1309
1315
newGen = -1L ;
1310
1316
blobStoreIndexShardSnapshots = buildBlobStoreIndexShardSnapshots (
1311
1317
indexId ,
1318
+ shardId ,
1312
1319
originalShardBlobs ,
1313
1320
shardContainer ,
1314
1321
originalRepositoryData .shardGenerations ().getShardGen (indexId , shardId )
@@ -3203,6 +3210,7 @@ private void doSnapshotShard(SnapshotShardContext context) {
3203
3210
snapshotStatus .ensureNotAborted ();
3204
3211
Tuple <BlobStoreIndexShardSnapshots , ShardGeneration > tuple = buildBlobStoreIndexShardSnapshots (
3205
3212
context .indexId (),
3213
+ shardId .id (),
3206
3214
blobs ,
3207
3215
shardContainer ,
3208
3216
generation
@@ -3848,14 +3856,15 @@ public BlobStoreIndexShardSnapshots getBlobStoreIndexShardSnapshots(IndexId inde
3848
3856
blobs = shardContainer .listBlobsByPrefix (OperationPurpose .SNAPSHOT_METADATA , SNAPSHOT_INDEX_PREFIX ).keySet ();
3849
3857
}
3850
3858
3851
- return buildBlobStoreIndexShardSnapshots (indexId , blobs , shardContainer , shardGen ).v1 ();
3859
+ return buildBlobStoreIndexShardSnapshots (indexId , shardId , blobs , shardContainer , shardGen ).v1 ();
3852
3860
}
3853
3861
3854
3862
/**
3855
3863
* Loads all available snapshots in the repository using the given {@code generation} or falling back to trying to determine it from
3856
3864
* the given list of blobs in the shard container.
3857
3865
*
3858
3866
* @param indexId {@link IndexId} identifying the corresponding index
3867
+ * @param shardId The 0-based shard id, see also {@link ShardId#id()}
3859
3868
* @param blobs list of blobs in repository
3860
3869
* @param generation shard generation or {@code null} in case there was no shard generation tracked in the {@link RepositoryData} for
3861
3870
* this shard because its snapshot was created in a version older than
@@ -3864,6 +3873,7 @@ public BlobStoreIndexShardSnapshots getBlobStoreIndexShardSnapshots(IndexId inde
3864
3873
*/
3865
3874
private Tuple <BlobStoreIndexShardSnapshots , ShardGeneration > buildBlobStoreIndexShardSnapshots (
3866
3875
IndexId indexId ,
3876
+ int shardId ,
3867
3877
Set <String > blobs ,
3868
3878
BlobContainer shardContainer ,
3869
3879
@ Nullable ShardGeneration generation
@@ -3883,6 +3893,21 @@ private Tuple<BlobStoreIndexShardSnapshots, ShardGeneration> buildBlobStoreIndex
3883
3893
generation
3884
3894
);
3885
3895
} catch (NoSuchFileException noSuchFileException ) {
3896
+ // Master may have concurrently mutated the shard generation. This can happen when master fails over
3897
+ // which is "expected". We do not need to apply the following workaround for missing file in this case.
3898
+ final RepositoryData currentRepositoryData ;
3899
+ try {
3900
+ final long latestGeneration = latestIndexBlobId ();
3901
+ currentRepositoryData = getRepositoryData (latestGeneration );
3902
+ } catch (Exception e ) {
3903
+ noSuchFileException .addSuppressed (e );
3904
+ throw noSuchFileException ;
3905
+ }
3906
+ final ShardGeneration latestShardGen = currentRepositoryData .shardGenerations ().getShardGen (indexId , shardId );
3907
+ if (latestShardGen == null || latestShardGen .equals (generation ) == false ) {
3908
+ throw noSuchFileException ;
3909
+ }
3910
+
3886
3911
// This shouldn't happen (absent an external force deleting blobs from the repo) but in practice we've found bugs in the way
3887
3912
// we manipulate shard generation UUIDs under concurrent snapshot load which can lead to incorrectly deleting a referenced
3888
3913
// shard-level `index-UUID` blob during finalization. We definitely want to treat this as a test failure (see the `assert`
0 commit comments