Skip to content

Commit 0ee9d28

Browse files
authored
CSI Volume Group Snapshots ONTAP-SAN-Economy
This commit extends the CSI Volume Group Snapshot feature to the ONTAP-SAN-Economy driver.
1 parent 97b910f commit 0ee9d28

25 files changed

+1805
-332
lines changed

core/orchestrator_core.go

Lines changed: 91 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,7 +1134,9 @@ func (o *TridentOrchestrator) validateAndCreateBackendFromConfig(
11341134
}
11351135
}
11361136

1137-
sb, err := factory.NewStorageBackendForConfig(ctx, configInJSON, configRef, backendUUID, commonConfig, backendSecret)
1137+
sb, err := factory.NewStorageBackendForConfig(
1138+
ctx, configInJSON, configRef, backendUUID, commonConfig, backendSecret,
1139+
)
11381140

11391141
if commonConfig.UserState != "" {
11401142
// If the userState field is present in tbc/backend.json, then update the userBackendState.
@@ -1502,7 +1504,9 @@ func (o *TridentOrchestrator) UpdateBackendState(
15021504
return backend.ConstructExternal(ctx), o.storeClient.UpdateBackend(ctx, backend)
15031505
}
15041506

1505-
func (o *TridentOrchestrator) updateUserBackendState(ctx context.Context, sb *storage.Backend, userBackendState string, isCLI bool) (err error) {
1507+
func (o *TridentOrchestrator) updateUserBackendState(
1508+
ctx context.Context, sb *storage.Backend, userBackendState string, isCLI bool,
1509+
) (err error) {
15061510
backend := *sb
15071511
Logc(ctx).WithFields(LogFields{
15081512
"backendName": backend.Name(),
@@ -1538,7 +1542,8 @@ func (o *TridentOrchestrator) updateUserBackendState(ctx context.Context, sb *st
15381542

15391543
// An extra check to ensure that the user-backend state is valid.
15401544
if err = newUserBackendState.Validate(); err != nil {
1541-
return fmt.Errorf("invalid user backend state provided: %s, allowed are: `%s`, `%s`", string(newUserBackendState), storage.UserNormal, storage.UserSuspended)
1545+
return fmt.Errorf("invalid user backend state provided: %s, allowed are: `%s`, `%s`",
1546+
string(newUserBackendState), storage.UserNormal, storage.UserSuspended)
15421547
}
15431548

15441549
// Idempotent check.
@@ -1550,8 +1555,8 @@ func (o *TridentOrchestrator) updateUserBackendState(ctx context.Context, sb *st
15501555
if newUserBackendState.IsSuspended() {
15511556
// Backend is only suspended when its current state is either online, offline or failed.
15521557
if !backend.State().IsOnline() && !backend.State().IsOffline() && !backend.State().IsFailed() {
1553-
return fmt.Errorf("the backend '%s' is currently not in any of the expected states: offline, online, or failed. Its current state is '%s'", backend.Name(),
1554-
backend.State())
1558+
return fmt.Errorf("the backend '%s' is currently not in any of the expected states: "+
1559+
"offline, online, or failed. Its current state is '%s'", backend.Name(), backend.State())
15551560
}
15561561
}
15571562

@@ -1561,7 +1566,9 @@ func (o *TridentOrchestrator) updateUserBackendState(ctx context.Context, sb *st
15611566
return nil
15621567
}
15631568

1564-
func (o *TridentOrchestrator) updateBackendState(ctx context.Context, sb *storage.Backend, backendState string) (err error) {
1569+
func (o *TridentOrchestrator) updateBackendState(
1570+
ctx context.Context, sb *storage.Backend, backendState string,
1571+
) (err error) {
15651572
backend := *sb
15661573
Logc(ctx).WithFields(LogFields{
15671574
"backendName": backend.Name(),
@@ -2195,7 +2202,9 @@ func (o *TridentOrchestrator) UpdateVolume(
21952202
}
21962203

21972204
// UpdateVolumeLUKSPassphraseNames updates the LUKS passphrase names stored on a volume in the cache and persistent store.
2198-
func (o *TridentOrchestrator) UpdateVolumeLUKSPassphraseNames(ctx context.Context, volume string, passphraseNames *[]string) error {
2205+
func (o *TridentOrchestrator) UpdateVolumeLUKSPassphraseNames(
2206+
ctx context.Context, volume string, passphraseNames *[]string,
2207+
) error {
21992208
ctx = GenerateRequestContextForLayer(ctx, LogLayerCore)
22002209

22012210
if o.bootstrapError != nil {
@@ -2285,8 +2294,8 @@ func (o *TridentOrchestrator) cloneVolumeInitial(
22852294

22862295
// Check if the storage class of source and clone volume is different, only if the orchestrator is not in Docker plugin mode. In Docker plugin mode, the storage class of source and clone volume will be different.
22872296
if !isDockerPluginMode() && volumeConfig.StorageClass != sourceVolume.Config.StorageClass {
2288-
return nil, errors.MismatchedStorageClassError("clone volume %s from source volume %s with different storage classes is not allowed", volumeConfig.Name,
2289-
volumeConfig.CloneSourceVolume)
2297+
return nil, errors.MismatchedStorageClassError("clone volume %s from source volume %s with"+
2298+
" different storage classes is not allowed", volumeConfig.Name, volumeConfig.CloneSourceVolume)
22902299
}
22912300

22922301
Logc(ctx).WithFields(LogFields{
@@ -2526,8 +2535,8 @@ func (o *TridentOrchestrator) cloneVolumeRetry(
25262535

25272536
// Check if the storage class of source and clone volume is different, only if the orchestrator is not in Docker plugin mode. In Docker plugin mode, the storage class of source and clone volume will be different at times.
25282537
if !isDockerPluginMode() && cloneConfig.StorageClass != sourceVolume.Config.StorageClass {
2529-
return nil, errors.MismatchedStorageClassError("clone volume %s from source volume %s with different storage classes is not allowed",
2530-
cloneConfig.Name, cloneConfig.CloneSourceVolume)
2538+
return nil, errors.MismatchedStorageClassError("clone volume %s from source volume %s with "+
2539+
"different storage classes is not allowed", cloneConfig.Name, cloneConfig.CloneSourceVolume)
25312540
}
25322541

25332542
backend, found = o.backends[txn.VolumeCreatingConfig.BackendUUID]
@@ -4100,7 +4109,7 @@ func (o *TridentOrchestrator) CreateSnapshot(
41004109

41014110
// CreateGroupSnapshot creates a snapshot of a volume group
41024111
func (o *TridentOrchestrator) CreateGroupSnapshot(
4103-
ctx context.Context, groupSnapshotConfig *storage.GroupSnapshotConfig,
4112+
ctx context.Context, config *storage.GroupSnapshotConfig,
41044113
) (externalGroupSnapshot *storage.GroupSnapshotExternal, err error) {
41054114
ctx = GenerateRequestContextForLayer(ctx, LogLayerCore)
41064115

@@ -4123,22 +4132,22 @@ func (o *TridentOrchestrator) CreateGroupSnapshot(
41234132
defer o.updateMetrics()
41244133

41254134
// pvc-UUID names of all pvcs in a volume group
4126-
sourceVolumeIDs := groupSnapshotConfig.GetVolumeNames()
4135+
sourceVolumeIDs := config.GetVolumeNames()
41274136
if len(sourceVolumeIDs) == 0 {
41284137
return nil, errors.InvalidInputError("group snapshot must have at least one source volume")
41294138
}
41304139

41314140
// Get the backends from the volumes and determine if they support group snapshots
41324141
// Map [backends] -> volumes
4133-
backendsToVolumes = make(map[string][]*storage.VolumeConfig, 0)
4142+
backendsToVolumes = make(map[string][]*storage.VolumeConfig)
41344143
// Get the volume and do simple validation
41354144
for _, volumeID := range sourceVolumeIDs {
41364145

41374146
// Check if the snapshot already exists, the snapshot will exist in our cache
41384147
// snapshots are stored by their ID pvc-UUID/snapshot-GroupSnapshotUUID
41394148
// For group snapshots, the snapshot name will be the same for every volume in the group but we'll be able to
41404149
// differentiate based on the pvc if needed
4141-
snapName, err := storage.ConvertGroupSnapshotID(groupSnapshotConfig.ID())
4150+
snapName, err := storage.ConvertGroupSnapshotID(config.ID())
41424151
if err != nil {
41434152
return nil, err
41444153
}
@@ -4217,7 +4226,7 @@ func (o *TridentOrchestrator) CreateGroupSnapshot(
42174226
// Add a vol transaction with enough info about the whole group
42184227
// Add transaction in case the operation must be rolled back later
42194228
txn := &storage.VolumeTransaction{
4220-
GroupSnapshotConfig: groupSnapshotConfig,
4229+
GroupSnapshotConfig: config,
42214230
Op: storage.AddGroupSnapshot,
42224231
}
42234232
if err = o.AddVolumeTransaction(ctx, txn); err != nil {
@@ -4226,18 +4235,50 @@ func (o *TridentOrchestrator) CreateGroupSnapshot(
42264235

42274236
// Recovery function in case of error
42284237
defer func() {
4229-
err = o.addGroupSnapshotCleanup(ctx, err, backendsToVolumes, groupSnapshot, snapshots, txn, groupSnapshotConfig)
4238+
err = o.addGroupSnapshotCleanup(ctx, err, backendsToVolumes, groupSnapshot, snapshots, txn, config)
42304239
}()
42314240

4232-
// Create the group snapshot
4233-
// TODO (TRID-16891): Do post-processing within the driver function; san-economy needs work
4234-
groupSnapshot, snapshots, err = groupSnapshotter.CreateGroupSnapshot(ctx, groupSnapshotConfig, groupSnapshotTarget)
4241+
// Create the group snapshot in the backend.
4242+
err = groupSnapshotter.CreateGroupSnapshot(ctx, config, groupSnapshotTarget)
42354243
if err != nil {
42364244
if errors.IsMaxLimitReachedError(err) {
42374245
return nil, errors.MaxLimitReachedError(fmt.Sprintf("failed to create group snapshot %s: %v",
4238-
groupSnapshotConfig.ID(), err))
4246+
config.ID(), err))
4247+
}
4248+
return nil, fmt.Errorf("failed to create group snapshot %s: %v", config.ID(), err)
4249+
}
4250+
4251+
// Process the group snapshot by backend and the set volumes relative to that backend.
4252+
// Defer handling errors until after all backends have processed their snapshots because
4253+
// the orchestrator needs to know which snapshots are present for each backend.
4254+
var processingErrs error
4255+
for backendUUID, volumes := range backendsToVolumes {
4256+
b, ok := o.backends[backendUUID]
4257+
if !ok {
4258+
return nil, errors.NotFoundError("backend not found")
42394259
}
4240-
return nil, fmt.Errorf("failed to create group snapshot %s: %v", groupSnapshotConfig.ID(), err)
4260+
4261+
// Each backend should handle backend-specific processing with its own slice of volumes.
4262+
snapshotsSlice, err := b.ProcessGroupSnapshot(ctx, config, volumes)
4263+
if err != nil {
4264+
Logc(ctx).WithFields(LogFields{
4265+
"volumes": volumes,
4266+
"backendUUID": backendUUID,
4267+
}).WithError(err).Debugf("Failed to process grouped snapshots for backend %s", b.Name())
4268+
processingErrs = errors.Join(processingErrs, err)
4269+
}
4270+
4271+
// Build up the set of grouped snapshots across all volumes in each backend.
4272+
snapshots = append(snapshots, snapshotsSlice...)
4273+
}
4274+
if processingErrs != nil {
4275+
return nil, errors.Join(err, processingErrs)
4276+
}
4277+
4278+
// Construct the group snapshot from the config and grouped snapshots across all backends.
4279+
groupSnapshot, err = groupSnapshotter.ConstructGroupSnapshot(ctx, config, snapshots)
4280+
if err != nil {
4281+
return nil, fmt.Errorf("failed to construct group snapshot %s: %v", config.ID(), err)
42414282
}
42424283

42434284
// Save references to new group snapshot and each snapshot
@@ -4273,43 +4314,41 @@ func (o *TridentOrchestrator) addGroupSnapshotCleanup(
42734314
// In this case, we don't need to roll anything back.
42744315
// 2. We failed to save the snapshots or group snapshot to the persistent store.
42754316
// In this case, we need to remove the snapshots from the backend.
4276-
if backendsToVolumes != nil && snapshots != nil {
4277-
// We succeeded in adding the snapshots to the backends; now delete them.
4278-
// We need to go through each backend to delete the snapshot
4279-
for backendUUID, volConfigs := range backendsToVolumes {
4280-
backend, ok := o.backends[backendUUID]
4281-
if !ok {
4282-
cleanupErr = errors.NotFoundError("backend not found")
4283-
}
4284-
for _, volConfig := range volConfigs {
4285-
var volName string
4286-
for _, snap := range snapshots {
4287-
volName, _, err = storage.ParseSnapshotID(snap.ID())
4288-
if err != nil {
4289-
cleanupErr = err
4290-
}
4291-
if volName == volConfig.Name {
4292-
cleanupErr = backend.DeleteSnapshot(ctx, snap.Config, volConfig)
4293-
if cleanupErr != nil {
4294-
cleanupErr = fmt.Errorf("unable to delete group snapshot from backend during cleanup: %v",
4295-
cleanupErr)
4296-
}
4297-
}
4317+
4318+
// We may have succeeded in adding some snapshots to the backends.
4319+
// Because something failed, we must delete them.
4320+
for backendUUID, volConfigs := range backendsToVolumes {
4321+
backend, ok := o.backends[backendUUID]
4322+
if !ok {
4323+
cleanupErr = errors.Join(cleanupErr, errors.NotFoundError("backend not found"))
4324+
}
4325+
4326+
for _, volConfig := range volConfigs {
4327+
var volName string
4328+
for _, snap := range snapshots {
4329+
volName, _, err = storage.ParseSnapshotID(snap.ID())
4330+
if err != nil {
4331+
cleanupErr = errors.Join(cleanupErr, err)
4332+
} else if volName != volConfig.Name {
4333+
continue
4334+
}
4335+
4336+
if deleteErr := backend.DeleteSnapshot(ctx, snap.Config, volConfig); deleteErr != nil {
4337+
cleanupErr = fmt.Errorf("unable to delete snapshot from backend during cleanup: %w", deleteErr)
42984338
}
42994339
}
43004340
}
4301-
if cleanupErr != nil {
4302-
cleanupErr = fmt.Errorf("unable to delete group snapshot from backend during cleanup: %v", cleanupErr)
4303-
}
43044341
}
43054342
}
4343+
43064344
if cleanupErr == nil {
43074345
// Only clean up the snapshot transaction if we've succeeded at cleaning up on the backend or if we didn't
43084346
// need to do so in the first place.
4309-
if txErr = o.DeleteVolumeTransaction(ctx, volTxn); txErr != nil {
4310-
txErr = fmt.Errorf("unable to clean up group snapshot transaction: %v", txErr)
4347+
if deleteErr := o.DeleteVolumeTransaction(ctx, volTxn); deleteErr != nil {
4348+
txErr = fmt.Errorf("unable to clean up group snapshot transaction: %w", deleteErr)
43114349
}
43124350
}
4351+
43134352
if cleanupErr != nil || txErr != nil {
43144353
// Remove the snapshot from memory, if it's there, so that the user can try to re-add.
43154354
// This will trigger recovery code.
@@ -4328,11 +4367,12 @@ func (o *TridentOrchestrator) addGroupSnapshotCleanup(
43284367
"Repeat creating the group snapshot or restart %v.",
43294368
err, config.OrchestratorName)
43304369
}
4370+
43314371
return err
43324372
}
43334373

4334-
func (o *TridentOrchestrator) GetGroupSnapshot(ctx context.Context,
4335-
groupSnapshotID string,
4374+
func (o *TridentOrchestrator) GetGroupSnapshot(
4375+
ctx context.Context, groupSnapshotID string,
43364376
) (groupSnapshotExternal *storage.GroupSnapshotExternal, err error) {
43374377
ctx = GenerateRequestContextForLayer(ctx, LogLayerCore)
43384378

frontend/csi/group_controller_server.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func (p *Plugin) CreateVolumeGroupSnapshot(
8787
// This operation must be idempotent. Attempt to clean up any artifacts from the failed create.
8888
Logc(ctx).WithFields(fields).WithError(err).Error("Failed to create group snapshot; cleaning up failed snapshots.")
8989
if deleteErr := p.orchestrator.DeleteGroupSnapshot(ctx, groupName); deleteErr != nil {
90-
Logc(ctx).WithFields(fields).WithError(deleteErr).Error("Failed to clean up failed snapshots.")
90+
Logc(ctx).WithFields(fields).WithError(deleteErr).Debug("Could not delete failed group snapshot.")
9191
err = errors.Join(err, deleteErr)
9292
}
9393

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ require (
5656
golang.org/x/crypto v0.40.0 // github.com/golang/crypto
5757
golang.org/x/net v0.42.0 // github.com/golang/net
5858
golang.org/x/oauth2 v0.30.0 // github.com/golang/oauth2
59+
golang.org/x/sync v0.16.0 // indirect
5960
golang.org/x/sys v0.34.0 // github.com/golang/sys
6061
golang.org/x/text v0.27.0 // github.com/golang/text
6162
golang.org/x/time v0.11.0 // github.com/golang/time
@@ -168,7 +169,6 @@ require (
168169
go.opentelemetry.io/otel/trace v1.35.0 // indirect
169170
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792 // indirect
170171
golang.org/x/mod v0.26.0 // indirect
171-
golang.org/x/sync v0.16.0 // indirect
172172
golang.org/x/term v0.33.0 // indirect
173173
golang.org/x/tools v0.35.0 // indirect
174174
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 // indirect

mocks/mock_storage/mock_storage.go

Lines changed: 33 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

mocks/mock_storage_drivers/mock_ontap/mock_api.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)