Skip to content

Commit cd3a14e

Browse files
author
anton.voskresensky
committed
fix delete snapshot
1 parent e241ad4 commit cd3a14e

File tree

1 file changed

+56
-2
lines changed

1 file changed

+56
-2
lines changed

pkg/utils/snapshots.go

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func CheckAndCleanSnapshot(snapshotName string, indexName string, snapshots []op
5252
}
5353
if snapshot.State == "PARTIAL" || snapshot.State == "FAILED" {
5454
logger.Info(fmt.Sprintf("Deleting PARTIAL/FAILED snapshot snapshot=%s state=%s", snapshotName, snapshot.State))
55-
err := client.DeleteSnapshots(snapRepo, []string{snapshotName})
55+
err := DeleteSnapshotsWithRetry(client, snapRepo, []string{snapshotName}, logger)
5656
if err != nil {
5757
logger.Error(fmt.Sprintf("Failed to delete PARTIAL/FAILED snapshot snapshot=%s error=%v", snapshotName, err))
5858
return false, err
@@ -352,7 +352,7 @@ retryLoop:
352352
duration := time.Since(startTime)
353353
durationStr := formatDuration(duration)
354354
logger.Warn(fmt.Sprintf("Snapshot is PARTIAL/FAILED, deleting and retrying snapshot=%s state=%s duration=%s attempt=%d", snapshotName, snapshot.State, durationStr, attempt))
355-
err := client.DeleteSnapshots(snapRepo, []string{snapshotName})
355+
err := DeleteSnapshotsWithRetry(client, snapRepo, []string{snapshotName}, logger)
356356
if err != nil {
357357
logger.Error(fmt.Sprintf("Failed to delete PARTIAL/FAILED snapshot snapshot=%s error=%v", snapshotName, err))
358358
} else {
@@ -479,6 +479,60 @@ func BatchDeleteSnapshots(client *opensearch.Client, snapshots []string, snapRep
479479
return successful, failed, nil
480480
}
481481

482+
func DeleteSnapshotsWithRetry(client *opensearch.Client, snapRepo string, snapshotNames []string, logger *logging.Logger) error {
483+
const maxRetries = 15
484+
485+
if len(snapshotNames) == 0 {
486+
return nil
487+
}
488+
489+
var lastErr error
490+
for attempt := 1; attempt <= maxRetries; attempt++ {
491+
existingSnapshots := make([]string, 0)
492+
for _, snapshotName := range snapshotNames {
493+
snapshots, err := GetSnapshotsIgnore404(client, snapRepo, snapshotName)
494+
if err != nil {
495+
logger.Warn(fmt.Sprintf("Failed to check snapshot existence snapshot=%s error=%v, will try to delete", snapshotName, err))
496+
existingSnapshots = append(existingSnapshots, snapshotName)
497+
continue
498+
}
499+
if len(snapshots) > 0 {
500+
existingSnapshots = append(existingSnapshots, snapshotName)
501+
} else {
502+
logger.Info(fmt.Sprintf("Snapshot already deleted, skipping snapshot=%s", snapshotName))
503+
}
504+
}
505+
506+
if len(existingSnapshots) == 0 {
507+
logger.Info(fmt.Sprintf("All snapshots already deleted attempt=%d snapshots=%v", attempt, snapshotNames))
508+
return nil
509+
}
510+
511+
logger.Info(fmt.Sprintf("Deleting snapshots attempt=%d maxRetries=%d snapshots=%v", attempt, maxRetries, existingSnapshots))
512+
513+
err := client.DeleteSnapshots(snapRepo, existingSnapshots)
514+
if err != nil {
515+
lastErr = err
516+
logger.Error(fmt.Sprintf("Failed to delete snapshots attempt=%d snapshots=%v error=%v", attempt, existingSnapshots, err))
517+
if attempt < maxRetries {
518+
logger.Info(fmt.Sprintf("Waiting 1 minute before retry attempt=%d", attempt+1))
519+
time.Sleep(1 * time.Minute)
520+
continue
521+
}
522+
} else {
523+
logger.Info(fmt.Sprintf("Snapshots deleted successfully attempt=%d snapshots=%v", attempt, existingSnapshots))
524+
return nil
525+
}
526+
}
527+
528+
if lastErr != nil {
529+
logger.Error(fmt.Sprintf("Failed to delete snapshots after all retries maxRetries=%d snapshots=%v error=%v", maxRetries, snapshotNames, lastErr))
530+
return lastErr
531+
}
532+
533+
return nil
534+
}
535+
482536
type SnapshotGroup struct {
483537
SnapshotName string
484538
Indices []string

0 commit comments

Comments
 (0)