7
7
*/
8
8
package org .elasticsearch .snapshots ;
9
9
10
+ import org .apache .logging .log4j .Level ;
10
11
import org .elasticsearch .action .ActionRequestBuilder ;
11
12
import org .elasticsearch .action .admin .cluster .snapshots .create .CreateSnapshotResponse ;
12
13
import org .elasticsearch .action .admin .cluster .snapshots .restore .RestoreSnapshotResponse ;
13
14
import org .elasticsearch .action .admin .cluster .snapshots .status .SnapshotsStatusResponse ;
14
15
import org .elasticsearch .action .index .IndexRequestBuilder ;
15
16
import org .elasticsearch .client .internal .Client ;
16
17
import org .elasticsearch .cluster .ClusterState ;
18
+ import org .elasticsearch .cluster .SnapshotsInProgress ;
17
19
import org .elasticsearch .cluster .metadata .Metadata ;
18
20
import org .elasticsearch .cluster .metadata .RepositoriesMetadata ;
21
+ import org .elasticsearch .cluster .service .ClusterService ;
19
22
import org .elasticsearch .common .bytes .BytesReference ;
20
23
import org .elasticsearch .common .settings .Settings ;
21
24
import org .elasticsearch .common .unit .ByteSizeUnit ;
22
25
import org .elasticsearch .common .util .concurrent .EsExecutors ;
23
26
import org .elasticsearch .core .IOUtils ;
24
27
import org .elasticsearch .core .Strings ;
25
28
import org .elasticsearch .index .IndexVersion ;
29
+ import org .elasticsearch .index .snapshots .blobstore .BlobStoreIndexShardSnapshotsIntegritySuppressor ;
26
30
import org .elasticsearch .repositories .IndexId ;
27
31
import org .elasticsearch .repositories .IndexMetaDataGenerations ;
28
32
import org .elasticsearch .repositories .Repository ;
32
36
import org .elasticsearch .repositories .ShardGenerations ;
33
37
import org .elasticsearch .repositories .blobstore .BlobStoreRepository ;
34
38
import org .elasticsearch .repositories .fs .FsRepository ;
39
+ import org .elasticsearch .test .ClusterServiceUtils ;
40
+ import org .elasticsearch .test .MockLog ;
35
41
import org .elasticsearch .xcontent .XContentFactory ;
36
42
37
43
import java .nio .channels .SeekableByteChannel ;
52
58
import static org .elasticsearch .test .hamcrest .ElasticsearchAssertions .assertAcked ;
53
59
import static org .elasticsearch .test .hamcrest .ElasticsearchAssertions .assertFileExists ;
54
60
import static org .hamcrest .Matchers .containsString ;
61
+ import static org .hamcrest .Matchers .empty ;
55
62
import static org .hamcrest .Matchers .equalTo ;
56
63
import static org .hamcrest .Matchers .greaterThan ;
57
64
import static org .hamcrest .Matchers .hasSize ;
@@ -767,6 +774,13 @@ public void testSnapshotWithMissingShardLevelIndexFile() throws Exception {
767
774
.setWaitForCompletion (true )
768
775
.setIndices ("test-idx-*" )
769
776
.get ();
777
+ final boolean repairWithDelete = randomBoolean ();
778
+ if (repairWithDelete || randomBoolean ()) {
779
+ clusterAdmin ().prepareCreateSnapshot (TEST_REQUEST_TIMEOUT , "test-repo" , "snap-for-deletion" )
780
+ .setWaitForCompletion (true )
781
+ .setIndices ("test-idx-1" )
782
+ .get ();
783
+ }
770
784
771
785
logger .info ("--> deleting shard level index file" );
772
786
final Path indicesPath = repo .resolve ("indices" );
@@ -780,28 +794,111 @@ public void testSnapshotWithMissingShardLevelIndexFile() throws Exception {
780
794
Files .delete (shardGen );
781
795
}
782
796
783
- logger .info ("--> creating another snapshot" );
797
+ if (randomBoolean ()) {
798
+ logger .info ("""
799
+ --> restoring the snapshot, the repository should not have lost any shard data despite deleting index-*, \
800
+ because it uses snap-*.dat files and not the index-* to determine what files to restore""" );
801
+ indicesAdmin ().prepareDelete ("test-idx-1" , "test-idx-2" ).get ();
802
+ RestoreSnapshotResponse restoreSnapshotResponse = clusterAdmin ().prepareRestoreSnapshot (
803
+ TEST_REQUEST_TIMEOUT ,
804
+ "test-repo" ,
805
+ "test-snap-1"
806
+ ).setWaitForCompletion (true ).get ();
807
+ assertEquals (0 , restoreSnapshotResponse .getRestoreInfo ().failedShards ());
808
+ ensureGreen ("test-idx-1" , "test-idx-2" );
809
+ }
810
+
811
+ logger .info ("--> creating another snapshot, which should re-create the missing file" );
812
+ try (
813
+ var ignored = new BlobStoreIndexShardSnapshotsIntegritySuppressor ();
814
+ var mockLog = MockLog .capture (BlobStoreRepository .class )
815
+ ) {
816
+ mockLog .addExpectation (
817
+ new MockLog .SeenEventExpectation (
818
+ "fallback message" ,
819
+ "org.elasticsearch.repositories.blobstore.BlobStoreRepository" ,
820
+ Level .ERROR ,
821
+ "index [test-idx-1/*] shard generation [*] in [test-repo][*] not found - falling back to reading all shard snapshots"
822
+ )
823
+ );
824
+ mockLog .addExpectation (
825
+ new MockLog .SeenEventExpectation (
826
+ "shard blobs list" ,
827
+ "org.elasticsearch.repositories.blobstore.BlobStoreRepository" ,
828
+ Level .ERROR ,
829
+ "read shard snapshots [*] due to missing shard generation [*] for index [test-idx-1/*] in [test-repo][*]"
830
+ )
831
+ );
832
+ if (repairWithDelete ) {
833
+ clusterAdmin ().prepareDeleteSnapshot (TEST_REQUEST_TIMEOUT , "test-repo" , "snap-for-deletion" ).get ();
834
+ } else if (randomBoolean ()) {
835
+ CreateSnapshotResponse createSnapshotResponse = clusterAdmin ().prepareCreateSnapshot (
836
+ TEST_REQUEST_TIMEOUT ,
837
+ "test-repo" ,
838
+ "test-snap-2"
839
+ ).setWaitForCompletion (true ).setIndices ("test-idx-1" ).get ();
840
+ assertEquals (
841
+ createSnapshotResponse .getSnapshotInfo ().totalShards (),
842
+ createSnapshotResponse .getSnapshotInfo ().successfulShards ()
843
+ );
844
+ } else {
845
+ clusterAdmin ().prepareCloneSnapshot (TEST_REQUEST_TIMEOUT , "test-repo" , "test-snap-1" , "test-snap-2" )
846
+ .setIndices ("test-idx-1" )
847
+ .get ();
848
+ safeAwait (
849
+ ClusterServiceUtils .addTemporaryStateListener (
850
+ internalCluster ().getInstance (ClusterService .class ),
851
+ cs -> SnapshotsInProgress .get (cs ).isEmpty ()
852
+ )
853
+ );
854
+ assertThat (
855
+ clusterAdmin ().prepareGetSnapshots (TEST_REQUEST_TIMEOUT , "test-repo" )
856
+ .setSnapshots ("test-snap-2" )
857
+ .get ()
858
+ .getSnapshots ()
859
+ .get (0 )
860
+ .shardFailures (),
861
+ empty ()
862
+ );
863
+ }
864
+ mockLog .assertAllExpectationsMatched ();
865
+
866
+ try (
867
+ Stream <Path > shardFiles = Files .list (
868
+ indicesPath .resolve (getRepositoryData ("test-repo" ).resolveIndexId ("test-idx-1" ).getId ()).resolve ("0" )
869
+ )
870
+ ) {
871
+ assertTrue (shardFiles .anyMatch (file -> file .getFileName ().toString ().startsWith (BlobStoreRepository .INDEX_FILE_PREFIX )));
872
+ }
873
+ }
874
+
875
+ if (randomBoolean ()) {
876
+ indicesAdmin ().prepareDelete ("test-idx-1" ).get ();
877
+ RestoreSnapshotResponse restoreSnapshotResponse2 = clusterAdmin ().prepareRestoreSnapshot (
878
+ TEST_REQUEST_TIMEOUT ,
879
+ "test-repo" ,
880
+ repairWithDelete ? "test-snap-1" : randomFrom ("test-snap-1" , "test-snap-2" )
881
+ ).setIndices ("test-idx-1" ).setWaitForCompletion (true ).get ();
882
+ assertEquals (0 , restoreSnapshotResponse2 .getRestoreInfo ().failedShards ());
883
+ ensureGreen ("test-idx-1" , "test-idx-2" );
884
+ }
885
+
886
+ logger .info ("--> creating another snapshot, which should succeed since the shard gen file now exists again" );
784
887
CreateSnapshotResponse createSnapshotResponse = clusterAdmin ().prepareCreateSnapshot (
785
888
TEST_REQUEST_TIMEOUT ,
786
889
"test-repo" ,
787
- "test-snap-2 "
890
+ "test-snap-3 "
788
891
).setWaitForCompletion (true ).setIndices ("test-idx-1" ).get ();
789
- assertEquals (
790
- createSnapshotResponse .getSnapshotInfo ().successfulShards (),
791
- createSnapshotResponse .getSnapshotInfo ().totalShards () - 1
792
- );
892
+ assertEquals (createSnapshotResponse .getSnapshotInfo ().totalShards (), createSnapshotResponse .getSnapshotInfo ().successfulShards ());
793
893
794
- logger .info (
795
- "--> restoring the first snapshot, the repository should not have lost any shard data despite deleting index-N, "
796
- + "because it uses snap-*.data files and not the index-N to determine what files to restore"
797
- );
798
- indicesAdmin ().prepareDelete ("test-idx-1" , "test-idx-2" ).get ();
799
- RestoreSnapshotResponse restoreSnapshotResponse = clusterAdmin ().prepareRestoreSnapshot (
894
+ indicesAdmin ().prepareDelete ("test-idx-1" ).get ();
895
+ RestoreSnapshotResponse restoreSnapshotResponse3 = clusterAdmin ().prepareRestoreSnapshot (
800
896
TEST_REQUEST_TIMEOUT ,
801
897
"test-repo" ,
802
- "test-snap-1"
803
- ).setWaitForCompletion (true ).get ();
804
- assertEquals (0 , restoreSnapshotResponse .getRestoreInfo ().failedShards ());
898
+ repairWithDelete ? randomFrom ("test-snap-1" , "test-snap-3" ) : randomFrom ("test-snap-1" , "test-snap-2" , "test-snap-3" )
899
+ ).setIndices ("test-idx-1" ).setWaitForCompletion (true ).get ();
900
+ assertEquals (0 , restoreSnapshotResponse3 .getRestoreInfo ().failedShards ());
901
+ ensureGreen ("test-idx-1" , "test-idx-2" );
805
902
}
806
903
807
904
public void testDeletesWithUnexpectedIndexBlob () throws Exception {
0 commit comments