Skip to content

Commit f573809

Browse files
authored
Merge pull request #3355 from landreasyan/validate-bd-size-before-expand
fix: validating Block Device Size Before Resizing FileSystem
2 parents d5fb169 + 9000e0c commit f573809

File tree

4 files changed

+146
-26
lines changed

4 files changed

+146
-26
lines changed

pkg/azuredisk/azure_controller_common_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,17 +1276,17 @@ func TestConcurrentDetachDisk(t *testing.T) {
12761276
// Simulate concurrent detach requests that would be batched
12771277
wg := &sync.WaitGroup{}
12781278
errorChan := make(chan error, 1)
1279-
for i := 0; i < 2; i++ {
1279+
for i := range 2 {
12801280
diskURI := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/disks/%s",
12811281
testCloud.SubscriptionID, testCloud.ResourceGroup, fmt.Sprintf("disk-batched-%d", i))
12821282
wg.Add(1)
1283-
go func() {
1283+
go func(i int, diskURI string) {
12841284
defer wg.Done()
12851285
err := common.DetachDisk(ctx, fmt.Sprintf("disk-batched-%d", i), diskURI, "vm1")
12861286
if err != nil {
12871287
errorChan <- err
12881288
}
1289-
}()
1289+
}(i, diskURI)
12901290
}
12911291

12921292
time.Sleep(1005 * time.Millisecond) // Wait for the batching timeout

pkg/azuredisk/fake_azuredisk.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ type FakeDriver interface {
103103
setThrottlingCache(key string, value string)
104104
getUsedLunsFromVolumeAttachments(context.Context, string) ([]int, error)
105105
getUsedLunsFromNode(context.Context, types.NodeName) ([]int, error)
106+
validateBlockDeviceSize(devicePath string, requestGiB int64) (int64, error)
106107
}
107108

108109
type fakeDriver struct {

pkg/azuredisk/nodeserver.go

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -538,34 +538,43 @@ func (d *Driver) NodeExpandVolume(_ context.Context, req *csi.NodeExpandVolumeRe
538538
}
539539
}
540540

541+
// CRITICAL SAFETY CHECK: Verify block device size BEFORE filesystem resize
542+
// This prevents filesystem corruption if the underlying disk hasn't been expanded yet
543+
actualDevicePath := devicePath
544+
if runtime.GOOS == "windows" && d.enableWindowsHostProcess {
545+
// in windows host process mode, this driver could get the volume size from the volume path
546+
actualDevicePath = volumePath
547+
}
548+
549+
currentBlockSizeBytes, err := d.validateBlockDeviceSize(actualDevicePath, requestGiB)
550+
if err != nil {
551+
return nil, status.Errorf(codes.FailedPrecondition, "NodeExpandVolume: block device size did not match requested size: %v", err)
552+
}
553+
554+
klog.V(2).Infof("NodeExpandVolume: block device size verified (%d bytes >= %d GiB requested), proceeding with filesystem resize",
555+
currentBlockSizeBytes, requestGiB)
556+
557+
// Now safe to resize filesystem since we've verified the block device is large enough
541558
var retErr error
542559
if err := resizeVolume(devicePath, volumePath, d.mounter); err != nil {
543560
retErr = status.Errorf(codes.Internal, "could not resize volume %q (%q): %v", volumeID, devicePath, err)
544561
klog.Errorf("%v, will continue checking whether the volume has been resized", retErr)
545562
}
546563

547-
if runtime.GOOS == "windows" && d.enableWindowsHostProcess {
548-
// in windows host process mode, this driver could get the volume size from the volume path
549-
devicePath = volumePath
550-
}
551-
gotBlockSizeBytes, err := getBlockSizeBytes(devicePath, d.mounter)
564+
// Final verification after filesystem resize
565+
finalBlockSizeBytes, err := d.validateBlockDeviceSize(actualDevicePath, requestGiB)
552566
if err != nil {
553-
return nil, status.Error(codes.Internal, fmt.Sprintf("could not get size of block volume at path %s: %v", devicePath, err))
554-
}
555-
gotBlockGiB := volumehelper.RoundUpGiB(gotBlockSizeBytes)
556-
if gotBlockGiB < requestGiB {
557567
if retErr != nil {
558568
return nil, retErr
559569
}
560-
// Because size was rounded up, getting more size than requested will be a success.
561-
return nil, status.Errorf(codes.Internal, "resize requested for %v, but after resizing volume size was %v", requestGiB, gotBlockGiB)
570+
return nil, status.Errorf(codes.Internal, "NodeExpandVolume: block device size did not match requested size after filesystem resize: %v", err)
562571
}
563572

564-
klog.V(2).Infof("NodeExpandVolume succeeded on resizing volume %v to %v", volumeID, gotBlockSizeBytes)
573+
klog.V(2).Infof("NodeExpandVolume succeeded on resizing volume %v to %v", volumeID, finalBlockSizeBytes)
565574

566575
isOperationSucceeded = true
567576
return &csi.NodeExpandVolumeResponse{
568-
CapacityBytes: gotBlockSizeBytes,
577+
CapacityBytes: finalBlockSizeBytes,
569578
}, nil
570579
}
571580

@@ -682,6 +691,23 @@ func (d *Driver) ensureBlockTargetFile(target string) error {
682691
return nil
683692
}
684693

694+
// validateBlockDeviceSize checks if the block device is large enough for the requested size
695+
// Returns the actual block size in bytes if validation passes, otherwise returns an error
696+
func (d *Driver) validateBlockDeviceSize(devicePath string, requestGiB int64) (int64, error) {
697+
blockSizeBytes, err := getBlockSizeBytes(devicePath, d.mounter)
698+
if err != nil {
699+
return 0, status.Error(codes.Internal, fmt.Sprintf("could not get size of block volume at path %s: %v", devicePath, err))
700+
}
701+
702+
blockGiB := volumehelper.RoundUpGiB(blockSizeBytes)
703+
704+
if blockGiB < requestGiB {
705+
return 0, status.Error(codes.Internal, fmt.Sprintf("block volume at path %s size check failed: current %d GiB < requested %d GiB", devicePath, blockGiB, requestGiB))
706+
}
707+
708+
return blockSizeBytes, nil
709+
}
710+
685711
func collectMountOptions(fsType string, mntFlags []string) []string {
686712
var options []string
687713
options = append(options, mntFlags...)

pkg/azuredisk/nodeserver_test.go

Lines changed: 103 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,15 +1058,15 @@ func TestNodeExpandVolume(t *testing.T) {
10581058
WindowsError: status.Errorf(codes.NotFound, "error reading link for mount D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test. target err: readlink D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test: The file or directory is not a reparse point."),
10591059
}
10601060
blockSizeErr := testutil.TestError{
1061-
DefaultError: status.Error(codes.Internal, "could not get size of block volume at path test: error when getting size of block volume at path test: output: , err: exit status 1"),
1061+
DefaultError: status.Error(codes.FailedPrecondition, "NodeExpandVolume: block device size did not match requested size: rpc error: code = Internal desc = block volume at path test size check failed: current 0 GiB < requested 15 GiB"),
10621062
WindowsError: status.Errorf(codes.NotFound, "error reading link for mount D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test. target err: readlink D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test: The file or directory is not a reparse point."),
10631063
}
10641064
resizeErr := testutil.TestError{
10651065
DefaultError: status.Errorf(codes.Internal, "could not resize volume \"test\" (\"test\"): resize of device test failed: %v. resize2fs output: ", notFoundErr),
10661066
WindowsError: status.Errorf(codes.NotFound, "error reading link for mount D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test. target err: readlink D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test: The file or directory is not a reparse point."),
10671067
}
10681068
sizeTooSmallErr := testutil.TestError{
1069-
DefaultError: status.Errorf(codes.Internal, "resize requested for %v, but after resizing volume size was %v", volumehelper.RoundUpGiB(stdCapacityRange.RequiredBytes), volumehelper.RoundUpGiB(stdCapacityRange.RequiredBytes/2)),
1069+
DefaultError: status.Error(codes.Internal, "NodeExpandVolume: block device size did not match requested size after filesystem resize: rpc error: code = Internal desc = block volume at path test size check failed: current 8 GiB < requested 15 GiB"),
10701070
WindowsError: status.Errorf(codes.NotFound, "error reading link for mount D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test. target err: readlink D:\\a\\azuredisk-csi-driver\\azuredisk-csi-driver\\pkg\\azuredisk\\target_test: The file or directory is not a reparse point."),
10711071
}
10721072

@@ -1091,6 +1091,10 @@ func TestNodeExpandVolume(t *testing.T) {
10911091
blockdevAction := func() ([]byte, []byte, error) {
10921092
return []byte(fmt.Sprintf("%d", stdCapacityRange.RequiredBytes)), []byte{}, nil
10931093
}
1094+
// Action that returns 0 bytes (invalid block size)
1095+
blockdevZeroAction := func() ([]byte, []byte, error) {
1096+
return []byte("0"), []byte{}, nil
1097+
}
10941098

10951099
tests := []struct {
10961100
desc string
@@ -1148,7 +1152,7 @@ func TestNodeExpandVolume(t *testing.T) {
11481152
},
11491153
expectedErr: blockSizeErr,
11501154
skipOnDarwin: true, // ResizeFs not supported on Darwin
1151-
outputScripts: []testingexec.FakeAction{findmntAction, blkidAction, resize2fsAction, notFoundErrAction},
1155+
outputScripts: []testingexec.FakeAction{findmntAction, blockdevZeroAction},
11521156
},
11531157
{
11541158
desc: "Resize failure",
@@ -1158,9 +1162,10 @@ func TestNodeExpandVolume(t *testing.T) {
11581162
VolumeId: "test",
11591163
StagingTargetPath: "test",
11601164
},
1161-
expectedErr: resizeErr,
1162-
skipOnDarwin: true, // ResizeFs not supported on Darwin
1163-
outputScripts: []testingexec.FakeAction{findmntAction, blkidAction, resize2fsFailedAction, blockdevSizeTooSmallAction},
1165+
expectedErr: resizeErr,
1166+
skipOnDarwin: true, // ResizeFs not supported on Darwin
1167+
// First blockdev call succeeds (pre-resize validation), blkid checks filesystem, resize2fs fails, second blockdev for post-resize validation
1168+
outputScripts: []testingexec.FakeAction{findmntAction, blockdevAction, blkidAction, resize2fsFailedAction, blockdevSizeTooSmallAction},
11641169
},
11651170
{
11661171
desc: "Resize too small failure",
@@ -1170,9 +1175,10 @@ func TestNodeExpandVolume(t *testing.T) {
11701175
VolumeId: "test",
11711176
StagingTargetPath: "test",
11721177
},
1173-
expectedErr: sizeTooSmallErr,
1174-
skipOnDarwin: true, // ResizeFs not supported on Darwin
1175-
outputScripts: []testingexec.FakeAction{findmntAction, blkidAction, resize2fsAction, blockdevSizeTooSmallAction},
1178+
expectedErr: sizeTooSmallErr,
1179+
skipOnDarwin: true, // ResizeFs not supported on Darwin
1180+
// First blockdev call succeeds (pre-resize validation), resize2fs succeeds, second blockdev shows size too small
1181+
outputScripts: []testingexec.FakeAction{findmntAction, blockdevAction, blkidAction, resize2fsAction, blockdevSizeTooSmallAction},
11761182
},
11771183
{
11781184
desc: "Successfully expanded",
@@ -1184,7 +1190,8 @@ func TestNodeExpandVolume(t *testing.T) {
11841190
},
11851191
skipOnWindows: true,
11861192
skipOnDarwin: true, // ResizeFs not supported on Darwin
1187-
outputScripts: []testingexec.FakeAction{findmntAction, blkidAction, resize2fsAction, blockdevAction},
1193+
// First blockdev call (pre-resize validation), resize2fs succeeds, second blockdev call (post-resize validation)
1194+
outputScripts: []testingexec.FakeAction{findmntAction, blockdevAction, blkidAction, resize2fsAction, blockdevAction},
11881195
},
11891196
{
11901197
desc: "Block volume expansion",
@@ -1475,3 +1482,89 @@ func TestNodePublishVolumeIdempotentMount(t *testing.T) {
14751482
err = os.RemoveAll(targetTest)
14761483
assert.NoError(t, err)
14771484
}
1485+
1486+
func TestValidateBlockDeviceSize(t *testing.T) {
1487+
cntl := gomock.NewController(t)
1488+
defer cntl.Finish()
1489+
1490+
d, _ := NewFakeDriver(cntl)
1491+
fakeMounter, err := mounter.NewFakeSafeMounter()
1492+
assert.NoError(t, err)
1493+
d.setMounter(fakeMounter)
1494+
1495+
notFoundErr := errors.New("exit status 1")
1496+
blockdevAction := func() ([]byte, []byte, error) {
1497+
return []byte(fmt.Sprintf("%d", volumehelper.GiBToBytes(20))), []byte{}, nil
1498+
}
1499+
blockdevSmallAction := func() ([]byte, []byte, error) {
1500+
return []byte(fmt.Sprintf("%d", volumehelper.GiBToBytes(5))), []byte{}, nil
1501+
}
1502+
blockdevErrorAction := func() ([]byte, []byte, error) {
1503+
return []byte{}, []byte{}, notFoundErr
1504+
}
1505+
1506+
tests := []struct {
1507+
desc string
1508+
devicePath string
1509+
requestGiB int64
1510+
outputScript testingexec.FakeAction
1511+
expectedErr error
1512+
expectedBytes int64
1513+
skipOnWindows bool
1514+
}{
1515+
{
1516+
desc: "Error getting block size",
1517+
devicePath: "/dev/sda",
1518+
requestGiB: 10,
1519+
outputScript: blockdevErrorAction,
1520+
expectedErr: status.Error(codes.Internal, fmt.Sprintf("could not get size of block volume at path %s: %v", "/dev/sda", fmt.Errorf("error when getting size of block volume at path /dev/sda: output: , err: exit status 1"))),
1521+
skipOnWindows: true,
1522+
},
1523+
{
1524+
desc: "Block size too small",
1525+
devicePath: "/dev/sdb",
1526+
requestGiB: 10,
1527+
outputScript: blockdevSmallAction,
1528+
expectedErr: status.Error(codes.Internal, fmt.Sprintf("block volume at path %s size check failed: current %d GiB < requested %d GiB", "/dev/sdb", 5, 10)),
1529+
skipOnWindows: true,
1530+
},
1531+
{
1532+
desc: "Successful validation",
1533+
devicePath: "/dev/sdc",
1534+
requestGiB: 10,
1535+
outputScript: blockdevAction,
1536+
expectedErr: nil,
1537+
expectedBytes: volumehelper.GiBToBytes(20),
1538+
skipOnWindows: true,
1539+
},
1540+
{
1541+
desc: "Exact size match",
1542+
devicePath: "/dev/sdd",
1543+
requestGiB: 20,
1544+
outputScript: blockdevAction,
1545+
expectedErr: nil,
1546+
expectedBytes: volumehelper.GiBToBytes(20),
1547+
skipOnWindows: true,
1548+
},
1549+
}
1550+
1551+
for _, test := range tests {
1552+
t.Run(test.desc, func(t *testing.T) {
1553+
if test.skipOnWindows && runtime.GOOS == "windows" {
1554+
t.Skip("Skipping test on Windows")
1555+
}
1556+
1557+
d.setNextCommandOutputScripts(test.outputScript)
1558+
1559+
actualBytes, err := d.validateBlockDeviceSize(test.devicePath, test.requestGiB)
1560+
1561+
if test.expectedErr == nil {
1562+
assert.NoError(t, err)
1563+
assert.Equal(t, test.expectedBytes, actualBytes)
1564+
} else {
1565+
assert.EqualError(t, err, test.expectedErr.Error())
1566+
assert.Equal(t, int64(0), actualBytes)
1567+
}
1568+
})
1569+
}
1570+
}

0 commit comments

Comments
 (0)