Skip to content

Commit de5e496

Browse files
committed
Use allocatedResources as new size for finishing volume expansion
on the node. Also add new tests for node only expansion modes
1 parent 201bdaa commit de5e496

File tree

6 files changed

+174
-45
lines changed

6 files changed

+174
-45
lines changed

pkg/volume/util/operationexecutor/node_expander.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ type NodeExpander struct {
3636

3737
// computed via precheck
3838
pvcStatusCap resource.Quantity
39-
pvCap resource.Quantity
4039
resizeStatus v1.ClaimResourceStatus
4140

4241
// indicates that if volume expansion failed on the node, then current expansion should be marked
@@ -76,7 +75,6 @@ type testResponseData struct {
7675
// it returns false.
7776
func (ne *NodeExpander) runPreCheck() bool {
7877
ne.pvcStatusCap = ne.pvc.Status.Capacity[v1.ResourceStorage]
79-
ne.pvCap = ne.pv.Spec.Capacity[v1.ResourceStorage]
8078

8179
allocatedResourceStatus := ne.pvc.Status.AllocatedResourceStatuses
8280
if currentStatus, ok := allocatedResourceStatus[v1.ResourceStorage]; ok {
@@ -120,6 +118,7 @@ func (ne *NodeExpander) runPreCheck() bool {
120118
func (ne *NodeExpander) expandOnPlugin() (bool, error, testResponseData) {
121119
allowExpansion := ne.runPreCheck()
122120
if !allowExpansion {
121+
klog.V(3).Infof("NodeExpandVolume is not allowed to proceed for volume %s with resizeStatus %s", ne.vmt.VolumeName, ne.resizeStatus)
123122
return false, nil, testResponseData{false, true}
124123
}
125124

pkg/volume/util/operationexecutor/operation_generator.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,6 +2009,9 @@ func (og *operationGenerator) expandVolumeDuringMount(volumeToMount VolumeToMoun
20092009
actualStateOfWorld: actualStateOfWorld,
20102010
}
20112011
if og.checkForRecoveryFromExpansion(pvc, volumeToMount) {
2012+
// if recovery feature is enabled, we can use allocated size from PVC status as new size
2013+
rsOpts.NewSize = pvc.Status.AllocatedResources[v1.ResourceStorage]
2014+
resizeOp.pluginResizeOpts = rsOpts
20122015
nodeExpander := newNodeExpander(resizeOp, og.kubeClient, og.recorder)
20132016
resizeFinished, err, _ := nodeExpander.expandOnPlugin()
20142017
return resizeFinished, err
@@ -2072,6 +2075,9 @@ func (og *operationGenerator) nodeExpandVolume(
20722075
}
20732076

20742077
if og.checkForRecoveryFromExpansion(pvc, volumeToMount) {
2078+
// if recovery feature is enabled, we can use allocated size from PVC status as new size
2079+
rsOpts.NewSize = pvc.Status.AllocatedResources[v1.ResourceStorage]
2080+
resizeOp.pluginResizeOpts = rsOpts
20752081
nodeExpander := newNodeExpander(resizeOp, og.kubeClient, og.recorder)
20762082
resizeFinished, err, _ := nodeExpander.expandOnPlugin()
20772083
return resizeFinished, err

test/e2e/storage/csimock/base.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ type testParameters struct {
9696
enableNodeExpansion bool // enable node expansion for CSI mock driver
9797
// just disable resizing on driver it overrides enableResizing flag for CSI mock driver
9898
disableResizingOnDriver bool
99+
disableControllerExpansion bool
99100
enableSnapshot bool
100101
enableVolumeMountGroup bool // enable the VOLUME_MOUNT_GROUP node capability in the CSI mock driver.
101102
enableNodeVolumeCondition bool
@@ -172,6 +173,7 @@ func (m *mockDriverSetup) init(ctx context.Context, tp testParameters) {
172173
EnableResizing: tp.enableResizing,
173174
EnableNodeExpansion: tp.enableNodeExpansion,
174175
EnableNodeVolumeCondition: tp.enableNodeVolumeCondition,
176+
DisableControllerExpansion: tp.disableControllerExpansion,
175177
EnableSnapshot: tp.enableSnapshot,
176178
EnableVolumeMountGroup: tp.enableVolumeMountGroup,
177179
TokenRequests: tp.tokenRequests,

test/e2e/storage/csimock/csi_volume_expansion.go

Lines changed: 128 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,10 @@ type expansionStatus int
4545

4646
const (
4747
expansionSuccess = iota
48-
expansionFailedOnController
49-
expansionFailedOnNode
48+
expansionFailedOnControllerWithInfeasibleError
49+
expansionFailedOnControllerWithFinalError
50+
expansionFailedOnNodeWithInfeasibleError
51+
expansionFailedOnNodeWithFinalError
5052
expansionFailedMissingStagingPath
5153
)
5254

@@ -61,12 +63,13 @@ var (
6163
)
6264

6365
type recoveryTest struct {
64-
name string
65-
pvcRequestSize string
66-
allocatedResource string
67-
simulatedCSIDriverError expansionStatus
68-
expectedResizeStatus v1.ClaimResourceStatus
69-
recoverySize resource.Quantity
66+
name string
67+
pvcRequestSize string
68+
allocatedResource string
69+
simulatedCSIDriverError expansionStatus
70+
disableControllerExpansion bool
71+
expectedResizeStatus v1.ClaimResourceStatus
72+
recoverySize resource.Quantity
7073
}
7174

7275
var _ = utils.SIGDescribe("CSI Mock volume expansion", func() {
@@ -400,35 +403,65 @@ var _ = utils.SIGDescribe("CSI Mock volume expansion", func() {
400403
f.Context("Expansion with recovery", feature.RecoverVolumeExpansionFailure, func() {
401404
tests := []recoveryTest{
402405
{
403-
name: "should record target size in allocated resources",
404-
pvcRequestSize: "4Gi",
405-
allocatedResource: "4Gi",
406-
simulatedCSIDriverError: expansionSuccess,
407-
expectedResizeStatus: "",
406+
name: "should record target size in allocated resources",
407+
pvcRequestSize: "4Gi",
408+
allocatedResource: "4Gi",
409+
disableControllerExpansion: false,
410+
simulatedCSIDriverError: expansionSuccess,
411+
expectedResizeStatus: "",
412+
},
413+
{
414+
name: "should allow recovery if controller expansion fails with infeasible error",
415+
pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller
416+
allocatedResource: "11Gi",
417+
disableControllerExpansion: false,
418+
simulatedCSIDriverError: expansionFailedOnControllerWithInfeasibleError,
419+
expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeInfeasible,
420+
recoverySize: resource.MustParse("4Gi"),
408421
},
409422
{
410-
name: "should allow recovery if controller expansion fails with final error",
411-
pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller
412-
allocatedResource: "11Gi",
413-
simulatedCSIDriverError: expansionFailedOnController,
414-
expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeInfeasible,
415-
recoverySize: resource.MustParse("4Gi"),
423+
name: "should allow recovery if controller expansion fails with final error",
424+
pvcRequestSize: "11Gi", // expansion to 11Gi will cause expansion to fail on controller
425+
allocatedResource: "11Gi",
426+
disableControllerExpansion: false,
427+
simulatedCSIDriverError: expansionFailedOnControllerWithFinalError,
428+
expectedResizeStatus: v1.PersistentVolumeClaimControllerResizeInProgress,
429+
recoverySize: resource.MustParse("4Gi"),
416430
},
417431
{
418-
name: "recovery should not be possible in partially expanded volumes",
419-
pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node
420-
allocatedResource: "9Gi",
421-
simulatedCSIDriverError: expansionFailedOnNode,
422-
expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible,
423-
recoverySize: resource.MustParse("5Gi"),
432+
name: "recovery should not be possible in partially expanded volumes",
433+
pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node
434+
allocatedResource: "9Gi",
435+
disableControllerExpansion: false,
436+
simulatedCSIDriverError: expansionFailedOnNodeWithInfeasibleError,
437+
expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible,
438+
recoverySize: resource.MustParse("5Gi"),
439+
},
440+
{
441+
name: "recovery should be possible for node-only expanded volumes with infeasible error",
442+
pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node
443+
allocatedResource: "9Gi",
444+
disableControllerExpansion: true,
445+
simulatedCSIDriverError: expansionFailedOnNodeWithInfeasibleError,
446+
expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInfeasible,
447+
recoverySize: resource.MustParse("5Gi"),
448+
},
449+
{
450+
name: "recovery should be possible for node-only expanded volumes with final error",
451+
pvcRequestSize: "9Gi", // expansion to 9Gi will cause expansion to fail on node
452+
allocatedResource: "9Gi",
453+
disableControllerExpansion: true,
454+
simulatedCSIDriverError: expansionFailedOnNodeWithFinalError,
455+
expectedResizeStatus: v1.PersistentVolumeClaimNodeResizeInProgress,
456+
recoverySize: resource.MustParse("5Gi"),
424457
},
425458
}
426459

427460
for _, t := range tests {
428461
test := t
429462
ginkgo.It(test.name, func(ctx context.Context) {
430463
var err error
431-
params := testParameters{enableResizing: true, enableNodeExpansion: true, enableRecoverExpansionFailure: true}
464+
params := testParameters{enableResizing: true, enableNodeExpansion: true, enableRecoverExpansionFailure: true, disableControllerExpansion: test.disableControllerExpansion}
432465

433466
if test.simulatedCSIDriverError != expansionSuccess {
434467
params.hooks = createExpansionHook(test.simulatedCSIDriverError)
@@ -476,9 +509,15 @@ func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClai
476509
err = waitForAllocatedResource(ctx, pvc, m, test.allocatedResource)
477510
framework.ExpectNoError(err, "While waiting for allocated resource to be updated")
478511

479-
ginkgo.By("Waiting for resizer to set resize status")
480-
err = waitForResizeStatus(ctx, pvc, m.cs, test.expectedResizeStatus)
481-
framework.ExpectNoError(err, "While waiting for resize status to be set")
512+
if test.expectedResizeStatus == v1.PersistentVolumeClaimNodeResizeInfeasible {
513+
ginkgo.By("Waiting for kubelet to fail expansion on the node")
514+
err = waitForResizeToFailOnNode(ctx, pvc, m.cs)
515+
framework.ExpectNoError(err, "While waiting for resize status to be set")
516+
} else {
517+
ginkgo.By("Waiting for resizer to set resize status")
518+
err = waitForResizeStatus(ctx, pvc, m.cs, test.expectedResizeStatus)
519+
framework.ExpectNoError(err, "While waiting for resize status to be set")
520+
}
482521

483522
ginkgo.By("Recover pvc size")
484523
newPVC, err := testsuites.ExpandPVCSize(ctx, pvc, test.recoverySize, m.cs)
@@ -492,16 +531,25 @@ func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClai
492531
}
493532

494533
// if expansion failed on controller with final error, then recovery should be possible
495-
if test.simulatedCSIDriverError == expansionFailedOnController {
534+
if test.simulatedCSIDriverError == expansionFailedOnControllerWithInfeasibleError {
535+
validateExpansionSuccess(ctx, pvc, m, test, test.recoverySize.String())
536+
return
537+
}
538+
539+
// if expansion failed on node with final error but volume was only expanded on the node
540+
// then recovery should be possible
541+
if test.disableControllerExpansion &&
542+
(test.simulatedCSIDriverError == expansionFailedOnNodeWithInfeasibleError ||
543+
test.simulatedCSIDriverError == expansionFailedOnNodeWithFinalError) {
496544
validateExpansionSuccess(ctx, pvc, m, test, test.recoverySize.String())
497545
return
498546
}
499547

500548
// if expansion succeeded on controller but failed on the node
501-
if test.simulatedCSIDriverError == expansionFailedOnNode {
549+
if test.simulatedCSIDriverError == expansionFailedOnNodeWithInfeasibleError {
502550
ginkgo.By("Wait for expansion to fail on node again")
503-
err = waitForResizeStatus(ctx, pvc, m.cs, v1.PersistentVolumeClaimNodeResizeInfeasible)
504-
framework.ExpectNoError(err, "While waiting for resize status to be set to expansion-failed-on-node")
551+
err = waitForResizeToFailOnNode(ctx, pvc, m.cs)
552+
framework.ExpectNoError(err, "While waiting for resize status to be set")
505553

506554
ginkgo.By("verify allocated resources after recovery")
507555
pvc, err = m.cs.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{})
@@ -520,11 +568,11 @@ func validateRecoveryBehaviour(ctx context.Context, pvc *v1.PersistentVolumeClai
520568

521569
func validateExpansionSuccess(ctx context.Context, pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, test recoveryTest, expectedAllocatedSize string) {
522570
var err error
523-
ginkgo.By("Waiting for persistent volume resize to finish")
524-
err = testsuites.WaitForControllerVolumeResize(ctx, pvc, m.cs, csiResizeWaitPeriod)
571+
ginkgo.By(fmt.Sprintf("Waiting for PV %s to be expanded to %s", pvc.Spec.VolumeName, test.recoverySize.String()))
572+
err = testsuites.WaitForRecoveryPVSize(pvc, m.cs, csiResizeWaitPeriod)
525573
framework.ExpectNoError(err, "While waiting for PV resize to finish")
526574

527-
ginkgo.By("Waiting for PVC resize to finish")
575+
ginkgo.By(fmt.Sprintf("Waiting for PVC %s to be expanded to %s", pvc.Name, test.recoverySize.String()))
528576
pvc, err = testsuites.WaitForFSResize(ctx, pvc, m.cs)
529577
framework.ExpectNoError(err, "while waiting for PVC to finish")
530578

@@ -561,6 +609,31 @@ func waitForResizeStatus(ctx context.Context, pvc *v1.PersistentVolumeClaim, c c
561609
return nil
562610
}
563611

612+
func waitForResizeToFailOnNode(ctx context.Context, pvc *v1.PersistentVolumeClaim, c clientset.Interface) error {
613+
var finalConditions []v1.PersistentVolumeClaimCondition
614+
waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, csiResizeWaitPeriod, true, func(pollContext context.Context) (bool, error) {
615+
var err error
616+
updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pollContext, pvc.Name, metav1.GetOptions{})
617+
618+
if err != nil {
619+
return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %w", pvc.Name, err)
620+
}
621+
pvcConditions := updatedPVC.Status.Conditions
622+
for _, cond := range pvcConditions {
623+
if cond.Type == v1.PersistentVolumeClaimNodeResizeError {
624+
return true, nil
625+
}
626+
}
627+
finalConditions = pvcConditions
628+
return false, nil
629+
})
630+
631+
if waitErr != nil {
632+
return fmt.Errorf("error while waiting for resize condition sync to NodeResizeError, actualStatus %+v: %w", finalConditions, waitErr)
633+
}
634+
return nil
635+
}
636+
564637
func waitForAllocatedResource(ctx context.Context, pvc *v1.PersistentVolumeClaim, m *mockDriverSetup, expectedSize string) error {
565638
expectedQuantity := resource.MustParse(expectedSize)
566639
waitErr := wait.PollUntilContextTimeout(ctx, resizePollInterval, csiResizeWaitPeriod, true, func(pollContext context.Context) (bool, error) {
@@ -596,15 +669,24 @@ func createExpansionHook(expectedExpansionStatus expansionStatus) *drivers.Hooks
596669
}
597670

598671
}
599-
case expansionFailedOnController:
672+
case expansionFailedOnControllerWithInfeasibleError:
600673
expansionRequest, ok := request.(*csipbv1.ControllerExpandVolumeRequest)
601674
if ok {
602675
requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI)
603676
if requestedSize.Cmp(maxControllerSizeLimit) > 0 {
604677
return nil, status.Error(codes.InvalidArgument, "invalid expansion request")
605678
}
606679
}
607-
case expansionFailedOnNode:
680+
case expansionFailedOnControllerWithFinalError:
681+
// This simulates a condition that a final, but not infeasible error is returned when expansion fails in the controller.
682+
expansionRequest, ok := request.(*csipbv1.ControllerExpandVolumeRequest)
683+
if ok {
684+
requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI)
685+
if requestedSize.Cmp(maxControllerSizeLimit) > 0 {
686+
return nil, status.Error(codes.PermissionDenied, "permission denied for expansion")
687+
}
688+
}
689+
case expansionFailedOnNodeWithInfeasibleError:
608690
expansionRequest, ok := request.(*csipbv1.NodeExpandVolumeRequest)
609691
if ok {
610692
requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI)
@@ -613,6 +695,14 @@ func createExpansionHook(expectedExpansionStatus expansionStatus) *drivers.Hooks
613695
}
614696

615697
}
698+
case expansionFailedOnNodeWithFinalError:
699+
expansionRequest, ok := request.(*csipbv1.NodeExpandVolumeRequest)
700+
if ok {
701+
requestedSize := resource.NewQuantity(expansionRequest.CapacityRange.RequiredBytes, resource.BinarySI)
702+
if requestedSize.Cmp(maxNodeExpansionLimit) > 0 {
703+
return nil, status.Error(codes.PermissionDenied, "permission denied for expansion")
704+
}
705+
}
616706
}
617707

618708
return nil, nil

test/e2e/storage/drivers/csi.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ type mockCSIDriver struct {
350350
embeddedCSIDriver *mockdriver.CSIDriver
351351
enableSELinuxMount *bool
352352
enableRecoverExpansionFailure bool
353+
disableControllerExpansion bool
353354
enableHonorPVReclaimPolicy bool
354355

355356
// Additional values set during PrepareTest
@@ -392,6 +393,7 @@ type CSIMockDriverOpts struct {
392393
EnableTopology bool
393394
EnableResizing bool
394395
EnableNodeExpansion bool
396+
DisableControllerExpansion bool
395397
EnableSnapshot bool
396398
EnableVolumeMountGroup bool
397399
EnableNodeVolumeCondition bool
@@ -550,6 +552,7 @@ func InitMockCSIDriver(driverOpts CSIMockDriverOpts) MockCSITestDriver {
550552
attachLimit: driverOpts.AttachLimit,
551553
enableNodeExpansion: driverOpts.EnableNodeExpansion,
552554
enableNodeVolumeCondition: driverOpts.EnableNodeVolumeCondition,
555+
disableControllerExpansion: driverOpts.DisableControllerExpansion,
553556
tokenRequests: driverOpts.TokenRequests,
554557
requiresRepublish: driverOpts.RequiresRepublish,
555558
fsGroupPolicy: driverOpts.FSGroupPolicy,
@@ -628,6 +631,7 @@ func (m *mockCSIDriver) PrepareTest(ctx context.Context, f *framework.Framework)
628631
DriverName: "csi-mock-" + f.UniqueName,
629632
AttachLimit: int64(m.attachLimit),
630633
NodeExpansionRequired: m.enableNodeExpansion,
634+
DisableControllerExpansion: m.disableControllerExpansion,
631635
NodeVolumeConditionRequired: m.enableNodeVolumeCondition,
632636
VolumeMountGroupRequired: m.enableVolumeMountGroup,
633637
EnableTopology: m.enableTopology,

0 commit comments

Comments
 (0)