Skip to content

Commit 8a33f16

Browse files
authored
Cleanup snapshot on clone failure
1 parent fd79a2d commit 8a33f16

File tree

3 files changed

+159
-2
lines changed

3 files changed

+159
-2
lines changed

frontend/csi/node_server_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12789,6 +12789,7 @@ func TestNodeUnstageFCPVolume(t *testing.T) {
1278912789
mockNodeHelper := mockNodeHelpers.NewMockNodeHelper(gomock.NewController(t))
1279012790
mockNodeHelper.EXPECT().DeleteTrackingInfo(gomock.Any(),
1279112791
gomock.Any()).Return(nil).AnyTimes()
12792+
mockNodeHelper.EXPECT().ReadTrackingInfo(gomock.Any(), gomock.Any()).Return(&models.VolumeTrackingInfo{}, nil).AnyTimes()
1279212793
return mockNodeHelper
1279312794
},
1279412795
setupFCPMock: func() fcp.FCP {

storage_drivers/ontap/ontap_common.go

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/google/uuid"
3030

3131
tridentconfig "github.com/netapp/trident/config"
32+
"github.com/netapp/trident/internal/fiji"
3233
. "github.com/netapp/trident/logging"
3334
"github.com/netapp/trident/pkg/capacity"
3435
"github.com/netapp/trident/pkg/collection"
@@ -150,6 +151,9 @@ var (
150151
smbShareDeleteACL = map[string]string{DefaultSMBAccessControlUser: DefaultSMBAccessControlUserType}
151152
lunMutex = locks.NewGCNamedMutex()
152153
igroupMutex = locks.NewGCNamedMutex()
154+
155+
duringVolCloneAfterSnapCreation1 = fiji.Register("duringVolCloneAfterSnapCreation1", "ontap_common")
156+
duringVolCloneAfterSnapCreation2 = fiji.Register("duringVolCloneAfterSnapCreation2", "ontap_common")
153157
)
154158

155159
// CleanBackendName removes brackets and replaces colons with periods to avoid regex parsing errors.
@@ -4271,15 +4275,60 @@ func ConstructGroupSnapshot(
42714275
return storage.NewGroupSnapshot(config, snapshotIDs, dateCreated), nil
42724276
}
42734277

4278+
func cleanupFailedCloneFlexVol(ctx context.Context, client api.OntapAPI, err error, clonedVolName, sourceVol,
4279+
createdSnapName string,
4280+
) {
4281+
// Return if err is nil or if the error is a volume exists error.
4282+
if err == nil || drivers.IsVolumeExistsError(err) {
4283+
return
4284+
}
4285+
4286+
Logc(ctx).WithFields(LogFields{
4287+
"clonedVolume": clonedVolName,
4288+
"sourceVol": sourceVol,
4289+
"snapshot": createdSnapName,
4290+
}).Debug("Cleaning up after failed flexvol clone.")
4291+
4292+
if clonedVolName != "" {
4293+
Logc(ctx).WithFields(LogFields{
4294+
"volume": clonedVolName,
4295+
}).Debug("Deleting volume after failed flexvol clone.")
4296+
4297+
if destroyErr := client.VolumeDestroy(ctx, clonedVolName, true, true); destroyErr != nil {
4298+
Logc(ctx).WithError(destroyErr).Warn("Unable to delete volume after failed volume clone.")
4299+
}
4300+
}
4301+
4302+
if createdSnapName != "" {
4303+
Logc(ctx).WithFields(LogFields{
4304+
"snapshot": createdSnapName,
4305+
}).Debug("Deleting snapshot after failed flexvol clone.")
4306+
4307+
if snapDeleteErr := client.VolumeSnapshotDelete(ctx, createdSnapName, sourceVol); snapDeleteErr != nil {
4308+
Logc(ctx).WithError(snapDeleteErr).Warn("Unable to delete snapshot after failed volume clone.")
4309+
}
4310+
}
4311+
}
4312+
42744313
// cloneFlexvol creates a volume clone
42754314
func cloneFlexvol(
42764315
ctx context.Context, cloneVolConfig *storage.VolumeConfig, labels string, split bool,
42774316
config *drivers.OntapStorageDriverConfig, client api.OntapAPI, qosPolicyGroup api.QosPolicyGroup,
42784317
) error {
4318+
// Vars used in failed clone cleanup
4319+
var err error
4320+
var clonedVolName string // Holds the cloned volume name
4321+
var createdSnapName string // Holds the snapshot name of a Trident created snapshot
4322+
42794323
name := cloneVolConfig.InternalName
42804324
source := cloneVolConfig.CloneSourceVolumeInternal
42814325
snapshot := cloneVolConfig.CloneSourceSnapshotInternal
42824326

4327+
// Cleanup cloned volume and snapshots we created if we error
4328+
defer func() {
4329+
cleanupFailedCloneFlexVol(ctx, client, err, clonedVolName, source, createdSnapName)
4330+
}()
4331+
42834332
fields := LogFields{
42844333
"Method": "cloneFlexvol",
42854334
"Type": "ontap_common",
@@ -4308,13 +4357,19 @@ func cloneFlexvol(
43084357
if err = client.VolumeSnapshotCreate(ctx, snapshot, source); err != nil {
43094358
return err
43104359
}
4360+
createdSnapName = snapshot
43114361
cloneVolConfig.CloneSourceSnapshotInternal = snapshot
43124362
}
43134363

4364+
if err = duringVolCloneAfterSnapCreation1.Inject(); err != nil {
4365+
return err
4366+
}
4367+
43144368
// Create the clone based on a snapshot
43154369
if err = client.VolumeCloneCreate(ctx, name, source, snapshot, false); err != nil {
43164370
return err
43174371
}
4372+
clonedVolName = name
43184373

43194374
desiredStates, abortStates := []string{"online"}, []string{"error"}
43204375
volState, err := client.VolumeWaitForStates(ctx, name, desiredStates, abortStates, maxFlexvolCloneWait)
@@ -4335,14 +4390,18 @@ func cloneFlexvol(
43354390

43364391
// Set the QoS Policy if necessary
43374392
if qosPolicyGroup.Kind != api.InvalidQosPolicyGroupKind {
4338-
if err := client.VolumeSetQosPolicyGroupName(ctx, name, qosPolicyGroup); err != nil {
4393+
if err = client.VolumeSetQosPolicyGroupName(ctx, name, qosPolicyGroup); err != nil {
43394394
return err
43404395
}
43414396
}
43424397

4398+
if err = duringVolCloneAfterSnapCreation2.Inject(); err != nil {
4399+
return err
4400+
}
4401+
43434402
// Split the clone if requested
43444403
if split {
4345-
if err := client.VolumeCloneSplitStart(ctx, name); err != nil {
4404+
if err = client.VolumeCloneSplitStart(ctx, name); err != nil {
43464405
return fmt.Errorf("error splitting clone: %v", err)
43474406
}
43484407
}

storage_drivers/ontap/ontap_common_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6898,6 +6898,23 @@ func TestCloneFlexvol(t *testing.T) {
68986898
split: false,
68996899
expectError: true,
69006900
},
6901+
"Snapshot cleanup on vol create error": {
6902+
configureOntapAPI: func(mockAPI *mockapi.MockOntapAPI) {
6903+
mockAPI.EXPECT().VolumeExists(ctx, internalName).Return(false, nil)
6904+
mockAPI.EXPECT().VolumeSnapshotCreate(ctx, gomock.Any(), "fakeSource").Return(nil)
6905+
mockAPI.EXPECT().VolumeCloneCreate(
6906+
ctx, internalName, cloneSourceVolumeInternal, gomock.Any(), false,
6907+
).Return(fmt.Errorf("error creating clone"))
6908+
mockAPI.EXPECT().VolumeSnapshotDelete(ctx, gomock.Any(), "fakeSource").Return(nil)
6909+
},
6910+
cloneVolumeConfig: storage.VolumeConfig{
6911+
InternalName: internalName,
6912+
CloneSourceVolumeInternal: cloneSourceVolumeInternal,
6913+
},
6914+
storageDriverConfig: storageDriverConfig,
6915+
split: false,
6916+
expectError: true,
6917+
},
69016918
"No specific snapshot was requested": {
69026919
configureOntapAPI: func(mockAPI *mockapi.MockOntapAPI) {
69036920
mockAPI.EXPECT().VolumeExists(ctx, internalName).Return(false, nil)
@@ -6933,6 +6950,7 @@ func TestCloneFlexvol(t *testing.T) {
69336950
).Return(nil)
69346951
mockAPI.EXPECT().VolumeWaitForStates(ctx, internalName, gomock.Any(), gomock.Any(),
69356952
maxFlexvolCloneWait).Return("", errors.New("error waiting for NVMe clone"))
6953+
mockAPI.EXPECT().VolumeDestroy(ctx, "dummy", true, true)
69366954
},
69376955
cloneVolumeConfig: cloneVolumeConfig,
69386956
storageDriverConfig: storageDriverConfigNVMe,
@@ -6948,6 +6966,7 @@ func TestCloneFlexvol(t *testing.T) {
69486966
mockAPI.EXPECT().VolumeSetComment(ctx, internalName, internalName, label).Return(errors.New("error creating clone"))
69496967
mockAPI.EXPECT().VolumeWaitForStates(ctx, internalName, gomock.Any(), gomock.Any(),
69506968
maxFlexvolCloneWait).Return("online", nil)
6969+
mockAPI.EXPECT().VolumeDestroy(ctx, "dummy", true, true)
69516970
},
69526971
cloneVolumeConfig: cloneVolumeConfig,
69536972
storageDriverConfig: storageDriverConfig,
@@ -6964,6 +6983,7 @@ func TestCloneFlexvol(t *testing.T) {
69646983
maxFlexvolCloneWait).Return("online", nil)
69656984
mockAPI.EXPECT().VolumeSetComment(ctx, internalName, internalName, label).Return(nil)
69666985
mockAPI.EXPECT().VolumeMount(ctx, internalName, "/"+internalName).Return(errors.New("error mounting volume"))
6986+
mockAPI.EXPECT().VolumeDestroy(ctx, "dummy", true, true)
69676987
},
69686988
cloneVolumeConfig: cloneVolumeConfig,
69696989
storageDriverConfig: storageDriverConfig,
@@ -6983,6 +7003,7 @@ func TestCloneFlexvol(t *testing.T) {
69837003
mockAPI.EXPECT().VolumeSetQosPolicyGroupName(
69847004
ctx, internalName, qosPolicyGroup,
69857005
).Return(errors.New("error setting qos policy"))
7006+
mockAPI.EXPECT().VolumeDestroy(ctx, "dummy", true, true)
69867007
},
69877008
cloneVolumeConfig: cloneVolumeConfig,
69887009
storageDriverConfig: storageDriverConfig,
@@ -7001,6 +7022,7 @@ func TestCloneFlexvol(t *testing.T) {
70017022
mockAPI.EXPECT().VolumeMount(ctx, internalName, "/"+internalName).Return(nil)
70027023
mockAPI.EXPECT().VolumeSetQosPolicyGroupName(ctx, internalName, qosPolicyGroup).Return(nil)
70037024
mockAPI.EXPECT().VolumeCloneSplitStart(ctx, internalName).Return(errors.New("error splitting clone"))
7025+
mockAPI.EXPECT().VolumeDestroy(ctx, "dummy", true, true)
70047026
},
70057027
cloneVolumeConfig: cloneVolumeConfig,
70067028
storageDriverConfig: storageDriverConfig,
@@ -10201,3 +10223,78 @@ func TestConstructGroupSnapshot(t *testing.T) {
1020110223
})
1020210224
}
1020310225
}
10226+
10227+
func TestCleanupFailedCloneFlexVol(t *testing.T) {
10228+
tests := map[string]struct {
10229+
err error
10230+
expectErr bool
10231+
clonedVolName string
10232+
sourceVol string
10233+
snapshot string
10234+
setupMock func(mockAPI *mockapi.MockOntapAPI)
10235+
}{
10236+
"Clean up vol and snap": {
10237+
err: errors.New("error"),
10238+
expectErr: false,
10239+
clonedVolName: "testClone",
10240+
sourceVol: "testSourceVol",
10241+
snapshot: "testSnap",
10242+
setupMock: func(mockAPI *mockapi.MockOntapAPI) {
10243+
mockAPI.EXPECT().VolumeDestroy(ctx, "testClone", true, true)
10244+
mockAPI.EXPECT().VolumeSnapshotDelete(ctx, "testSnap", "testSourceVol")
10245+
},
10246+
},
10247+
"Clean up snapshot only": {
10248+
err: errors.New("error"),
10249+
expectErr: false,
10250+
clonedVolName: "",
10251+
sourceVol: "testSourceVol",
10252+
snapshot: "testSnap",
10253+
setupMock: func(mockAPI *mockapi.MockOntapAPI) {
10254+
mockAPI.EXPECT().VolumeSnapshotDelete(ctx, "testSnap", "testSourceVol")
10255+
},
10256+
},
10257+
"Clean up cloned vol only": {
10258+
err: errors.New("error"),
10259+
expectErr: false,
10260+
clonedVolName: "testClone",
10261+
sourceVol: "testSourceVol",
10262+
snapshot: "",
10263+
setupMock: func(mockAPI *mockapi.MockOntapAPI) {
10264+
mockAPI.EXPECT().VolumeDestroy(ctx, "testClone", true, true)
10265+
},
10266+
},
10267+
"Skip cleanup no error": {
10268+
err: nil,
10269+
expectErr: false,
10270+
clonedVolName: "clonedVol",
10271+
sourceVol: "testVol",
10272+
snapshot: "testSnap",
10273+
setupMock: func(mockAPI *mockapi.MockOntapAPI) {
10274+
},
10275+
},
10276+
"Snapshot and vol destroy error": {
10277+
err: errors.New("error"),
10278+
expectErr: false,
10279+
clonedVolName: "clonedVol",
10280+
sourceVol: "testVol",
10281+
snapshot: "testSnap",
10282+
setupMock: func(mockAPI *mockapi.MockOntapAPI) {
10283+
mockAPI.EXPECT().VolumeDestroy(ctx, "clonedVol", true, true).Return(fmt.Errorf("mock error"))
10284+
mockAPI.EXPECT().VolumeSnapshotDelete(ctx, "testSnap", "testVol").Return(fmt.Errorf("mock error"))
10285+
},
10286+
},
10287+
}
10288+
10289+
for name, test := range tests {
10290+
t.Run(name, func(t *testing.T) {
10291+
ctx := context.Background()
10292+
mockCtrl := gomock.NewController(t)
10293+
defer mockCtrl.Finish()
10294+
mockAPI := mockapi.NewMockOntapAPI(mockCtrl)
10295+
test.setupMock(mockAPI)
10296+
10297+
cleanupFailedCloneFlexVol(ctx, mockAPI, test.err, test.clonedVolName, test.sourceVol, test.snapshot)
10298+
})
10299+
}
10300+
}

0 commit comments

Comments
 (0)