Skip to content

Commit 8a29bf5

Browse files
committed
Re-queue SnapshotContents that are readyToUse: false
SnapshotContents with readyToUse: false should be periodically requeued with exp. backoff until the CSI driver confirms the snapshot is ready.
1 parent f9e125b commit 8a29bf5

File tree

4 files changed

+174
-45
lines changed

4 files changed

+174
-45
lines changed

pkg/sidecar-controller/content_create_test.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,98 @@ func TestSyncContent(t *testing.T) {
198198
errors: noerrors,
199199
test: testSyncContent,
200200
},
201+
{
202+
name: "1-7: Just created un-ready snapshot should be requeued",
203+
// A new snapshot should be created
204+
initialContents: withContentStatus(newContentArray("content1-7", "snapuid1-7", "snap1-7", "sid1-7", defaultClass, "", "volume-handle-1-7", retainPolicy, nil, &defaultSize, true),
205+
nil),
206+
expectedContents: withContentAnnotations(withContentStatus(newContentArray("content1-7", "snapuid1-7", "snap1-7", "sid1-7", defaultClass, "", "volume-handle-1-7", retainPolicy, nil, &defaultSize, true),
207+
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-7"), RestoreSize: &defaultSize, ReadyToUse: &False}),
208+
map[string]string{}),
209+
expectedEvents: noevents,
210+
expectedCreateCalls: []createCall{
211+
{
212+
volumeHandle: "volume-handle-1-7",
213+
snapshotName: "snapshot-snapuid1-7",
214+
driverName: mockDriverName,
215+
snapshotId: "snapuid1-7",
216+
parameters: map[string]string{
217+
utils.PrefixedVolumeSnapshotNameKey: "snap1-7",
218+
utils.PrefixedVolumeSnapshotNamespaceKey: "default",
219+
utils.PrefixedVolumeSnapshotContentNameKey: "content1-7",
220+
},
221+
creationTime: timeNow,
222+
readyToUse: false,
223+
size: defaultSize,
224+
},
225+
},
226+
errors: noerrors,
227+
expectRequeue: true,
228+
expectSuccess: true,
229+
test: testSyncContent,
230+
},
231+
{
232+
name: "1-8: Un-ready snapshot that remains un-ready should be requeued",
233+
// An un-ready snapshot already exists, it will be refreshed
234+
initialContents: withContentAnnotations(withContentStatus(newContentArray("content1-8", "snapuid1-8", "snap1-8", "sid1-8", defaultClass, "", "volume-handle-1-8", retainPolicy, nil, &defaultSize, true),
235+
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-8"), RestoreSize: &defaultSize, ReadyToUse: &False}),
236+
map[string]string{}),
237+
expectedContents: withContentAnnotations(withContentStatus(newContentArray("content1-8", "snapuid1-8", "snap1-8", "sid1-8", defaultClass, "", "volume-handle-1-8", retainPolicy, nil, &defaultSize, true),
238+
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-8"), RestoreSize: &defaultSize, ReadyToUse: &False}),
239+
map[string]string{}),
240+
expectedEvents: noevents,
241+
expectedCreateCalls: []createCall{
242+
{
243+
volumeHandle: "volume-handle-1-8",
244+
snapshotName: "snapshot-snapuid1-8",
245+
driverName: mockDriverName,
246+
snapshotId: "snapuid1-8",
247+
parameters: map[string]string{
248+
utils.PrefixedVolumeSnapshotNameKey: "snap1-8",
249+
utils.PrefixedVolumeSnapshotNamespaceKey: "default",
250+
utils.PrefixedVolumeSnapshotContentNameKey: "content1-8",
251+
},
252+
creationTime: timeNow,
253+
readyToUse: false,
254+
size: defaultSize,
255+
},
256+
},
257+
errors: noerrors,
258+
expectRequeue: true,
259+
expectSuccess: true,
260+
test: testSyncContent,
261+
},
262+
{
263+
name: "1-9: Un-ready snapshot that becomes ready should not be requeued",
264+
// An un-ready snapshot already exists, it will be refreshed
265+
initialContents: withContentAnnotations(withContentStatus(newContentArray("content1-9", "snapuid1-9", "snap1-9", "sid1-9", defaultClass, "", "volume-handle-1-9", retainPolicy, nil, &defaultSize, true),
266+
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-9"), RestoreSize: &defaultSize, ReadyToUse: &False}),
267+
map[string]string{}),
268+
expectedContents: withContentAnnotations(withContentStatus(newContentArray("content1-9", "snapuid1-9", "snap1-9", "sid1-9", defaultClass, "", "volume-handle-1-9", retainPolicy, nil, &defaultSize, true),
269+
&crdv1.VolumeSnapshotContentStatus{SnapshotHandle: toStringPointer("snapuid1-9"), RestoreSize: &defaultSize, ReadyToUse: &True}),
270+
map[string]string{}),
271+
expectedEvents: noevents,
272+
expectedCreateCalls: []createCall{
273+
{
274+
volumeHandle: "volume-handle-1-9",
275+
snapshotName: "snapshot-snapuid1-9",
276+
driverName: mockDriverName,
277+
snapshotId: "snapuid1-9",
278+
parameters: map[string]string{
279+
utils.PrefixedVolumeSnapshotNameKey: "snap1-9",
280+
utils.PrefixedVolumeSnapshotNamespaceKey: "default",
281+
utils.PrefixedVolumeSnapshotContentNameKey: "content1-9",
282+
},
283+
creationTime: timeNow,
284+
readyToUse: true,
285+
size: defaultSize,
286+
},
287+
},
288+
errors: noerrors,
289+
expectRequeue: false,
290+
expectSuccess: true,
291+
test: testSyncContent,
292+
},
201293
}
202294

203295
runSyncContentTests(t, tests, snapshotClasses)

pkg/sidecar-controller/framework_test.go

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,10 @@ type controllerTest struct {
9797
// Function to call as the test.
9898
test testCall
9999
expectSuccess bool
100+
expectRequeue bool
100101
}
101102

102-
type testCall func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error
103+
type testCall func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (requeue bool, err error)
103104

104105
const (
105106
testNamespace = "default"
@@ -690,16 +691,16 @@ func withContentAnnotations(content []*crdv1.VolumeSnapshotContent, annotations
690691
return content
691692
}
692693

693-
func testSyncContent(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error {
694+
func testSyncContent(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (bool, error) {
694695
return ctrl.syncContent(test.initialContents[0])
695696
}
696697

697-
func testSyncContentError(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error {
698-
err := ctrl.syncContent(test.initialContents[0])
698+
func testSyncContentError(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (bool, error) {
699+
requeue, err := ctrl.syncContent(test.initialContents[0])
699700
if err != nil {
700-
return nil
701+
return requeue, nil
701702
}
702-
return fmt.Errorf("syncSnapshotContent succeeded when failure was expected")
703+
return requeue, fmt.Errorf("syncSnapshotContent succeeded when failure was expected")
703704
}
704705

705706
var (
@@ -725,18 +726,19 @@ var (
725726
// controller waits for the operation lock. Controller is then resumed and we
726727
// check how it behaves.
727728
func wrapTestWithInjectedOperation(toWrap testCall, injectBeforeOperation func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor)) testCall {
728-
return func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) error {
729+
return func(ctrl *csiSnapshotSideCarController, reactor *snapshotReactor, test controllerTest) (bool, error) {
729730
// Inject a hook before async operation starts
730731
klog.V(4).Infof("reactor:injecting call")
731732
injectBeforeOperation(ctrl, reactor)
732733

733734
// Run the tested function (typically syncContent) in a
734735
// separate goroutine.
735736
var testError error
737+
var requeue bool
736738
var testFinished int32
737739

738740
go func() {
739-
testError = toWrap(ctrl, reactor, test)
741+
requeue, testError = toWrap(ctrl, reactor, test)
740742
// Let the "main" test function know that syncContent has finished.
741743
atomic.StoreInt32(&testFinished, 1)
742744
}()
@@ -746,7 +748,7 @@ func wrapTestWithInjectedOperation(toWrap testCall, injectBeforeOperation func(c
746748
time.Sleep(time.Millisecond * 10)
747749
}
748750

749-
return testError
751+
return requeue, testError
750752
}
751753
}
752754

@@ -804,13 +806,20 @@ func runSyncContentTests(t *testing.T, tests []controllerTest, snapshotClasses [
804806
ctrl.classLister = storagelisters.NewVolumeSnapshotClassLister(indexer)
805807

806808
// Run the tested functions
807-
err = test.test(ctrl, reactor, test)
809+
requeue, err := test.test(ctrl, reactor, test)
808810
if test.expectSuccess && err != nil {
809811
t.Errorf("Test %q failed: %v", test.name, err)
810812
}
811813
if !test.expectSuccess && err == nil {
812814
t.Errorf("Test %q failed: expected error, got nil", test.name)
813815
}
816+
if !test.expectSuccess && err == nil {
817+
t.Errorf("Test %q failed: expected error, got nil", test.name)
818+
}
819+
// requeue has meaning only when err == nil. A snapshot content is automatically requeued on error
820+
if err == nil && requeue != test.expectRequeue {
821+
t.Errorf("Test %q expected requeue %t, got %t", test.name, test.expectRequeue, requeue)
822+
}
814823

815824
// Wait for the target state
816825
err = reactor.waitTest(test)

pkg/sidecar-controller/snapshot_controller.go

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,9 @@ import (
5151

5252
const controllerUpdateFailMsg = "snapshot controller failed to update"
5353

54-
// syncContent deals with one key off the queue. It returns false when it's time to quit.
55-
func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnapshotContent) error {
54+
// syncContent deals with one key off the queue. It returns flag indicating if the
55+
// content should be requeued. On error, the content is always requeued.
56+
func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnapshotContent) (requeue bool, err error) {
5657
klog.V(5).Infof("synchronizing VolumeSnapshotContent[%s]", content.Name)
5758

5859
if ctrl.shouldDelete(content) {
@@ -63,13 +64,22 @@ func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnaps
6364
// underlying storage system. Note that the deletion snapshot operation will
6465
// update content SnapshotHandle to nil upon a successful deletion. At this
6566
// point, the finalizer on content should NOT be removed to avoid leaking.
66-
return ctrl.deleteCSISnapshot(content)
67+
err := ctrl.deleteCSISnapshot(content)
68+
if err != nil {
69+
return true, err
70+
}
71+
return false, nil
6772
}
6873
// otherwise, either the snapshot has been deleted from the underlying
6974
// storage system, or the deletion policy is Retain, remove the finalizer
7075
// if there is one so that API server could delete the object if there is
7176
// no other finalizer.
72-
return ctrl.removeContentFinalizer(content)
77+
err := ctrl.removeContentFinalizer(content)
78+
if err != nil {
79+
return true, err
80+
}
81+
return false, nil
82+
7383
}
7484
if content.Spec.Source.VolumeHandle != nil && content.Status == nil {
7585
klog.V(5).Infof("syncContent: Call CreateSnapshot for content %s", content.Name)
@@ -79,11 +89,13 @@ func (ctrl *csiSnapshotSideCarController) syncContent(content *crdv1.VolumeSnaps
7989
// already true. We don't want to keep calling CreateSnapshot
8090
// or ListSnapshots CSI methods over and over again for
8191
// performance reasons.
82-
var err error
83-
if content.Status != nil && content.Status.ReadyToUse != nil && *content.Status.ReadyToUse == true {
92+
if contentIsReady(content) {
8493
// Try to remove AnnVolumeSnapshotBeingCreated if it is not removed yet for some reason
8594
_, err = ctrl.removeAnnVolumeSnapshotBeingCreated(content)
86-
return err
95+
if err != nil {
96+
return true, err
97+
}
98+
return false, nil
8799
}
88100
return ctrl.checkandUpdateContentStatus(content)
89101
}
@@ -98,39 +110,42 @@ func (ctrl *csiSnapshotSideCarController) storeContentUpdate(content interface{}
98110
return utils.StoreObjectUpdate(ctrl.contentStore, content, "content")
99111
}
100112

101-
// createSnapshot starts new asynchronous operation to create snapshot
102-
func (ctrl *csiSnapshotSideCarController) createSnapshot(content *crdv1.VolumeSnapshotContent) error {
113+
// createSnapshot starts new asynchronous operation to create snapshot. It returns flag indicating if the
114+
// content should be requeued. On error, the content is always requeued.
115+
func (ctrl *csiSnapshotSideCarController) createSnapshot(content *crdv1.VolumeSnapshotContent) (requeue bool, err error) {
103116
klog.V(5).Infof("createSnapshot for content [%s]: started", content.Name)
104117
contentObj, err := ctrl.createSnapshotWrapper(content)
105118
if err != nil {
106119
ctrl.updateContentErrorStatusWithEvent(contentObj, v1.EventTypeWarning, "SnapshotCreationFailed", fmt.Sprintf("Failed to create snapshot: %v", err))
107120
klog.Errorf("createSnapshot for content [%s]: error occurred in createSnapshotWrapper: %v", content.Name, err)
108-
return err
121+
return true, err
109122
}
110123

111124
_, updateErr := ctrl.storeContentUpdate(contentObj)
112125
if updateErr != nil {
113126
// We will get an "snapshot update" event soon, this is not a big error
114127
klog.V(4).Infof("createSnapshot for content [%s]: cannot update internal content cache: %v", content.Name, updateErr)
115128
}
116-
return nil
129+
return !contentIsReady(contentObj), nil
117130
}
118131

119-
func (ctrl *csiSnapshotSideCarController) checkandUpdateContentStatus(content *crdv1.VolumeSnapshotContent) error {
132+
// checkandUpdateContentStatus checks status of the volume snapshot in CSI driver and updates content.status
133+
// accordingly. It returns flag indicating if the content should be requeued. On error, the content is
134+
// always requeued.
135+
func (ctrl *csiSnapshotSideCarController) checkandUpdateContentStatus(content *crdv1.VolumeSnapshotContent) (requeue bool, err error) {
120136
klog.V(5).Infof("checkandUpdateContentStatus[%s] started", content.Name)
121137
contentObj, err := ctrl.checkandUpdateContentStatusOperation(content)
122138
if err != nil {
123139
ctrl.updateContentErrorStatusWithEvent(contentObj, v1.EventTypeWarning, "SnapshotContentCheckandUpdateFailed", fmt.Sprintf("Failed to check and update snapshot content: %v", err))
124140
klog.Errorf("checkandUpdateContentStatus [%s]: error occurred %v", content.Name, err)
125-
return err
141+
return true, err
126142
}
127143
_, updateErr := ctrl.storeContentUpdate(contentObj)
128144
if updateErr != nil {
129145
// We will get an "snapshot update" event soon, this is not a big error
130146
klog.V(4).Infof("checkandUpdateContentStatus [%s]: cannot update internal cache: %v", content.Name, updateErr)
131147
}
132-
133-
return nil
148+
return !contentIsReady(contentObj), nil
134149
}
135150

136151
// updateContentStatusWithEvent saves new content.Status to API server and emits
@@ -380,6 +395,7 @@ func (ctrl *csiSnapshotSideCarController) deleteCSISnapshotOperation(content *cr
380395
return err
381396
}
382397
// trigger syncContent
398+
// TODO: just enqueue the content object instead of calling syncContent directly
383399
ctrl.updateContentInInformerCache(newContent)
384400
return nil
385401
}
@@ -689,3 +705,7 @@ func isCSIFinalError(err error) bool {
689705
// even start or failed. It is for sure not in progress.
690706
return true
691707
}
708+
709+
func contentIsReady(content *crdv1.VolumeSnapshotContent) bool {
710+
return content.Status != nil && content.Status.ReadyToUse != nil && *content.Status.ReadyToUse
711+
}

0 commit comments

Comments
 (0)