Skip to content

Commit 8526bf7

Browse files
committed
Fix VF creation race by waiting for VF symlinks before switchdev
Attempting to unbind VFs or switch the PF to switchdev mode before all VFs are ready causes race conditions, leading to mode switch failures with "invalid argument" errors. Signed-off-by: Ashok Pariya <ashok.pariya@ibm.com>
1 parent 11b5505 commit 8526bf7

File tree

2 files changed

+59
-4
lines changed

2 files changed

+59
-4
lines changed

pkg/host/internal/sriov/sriov.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,6 +1154,58 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin
11541154
return fn(pciAddr, desiredEswitchMode, numVFs)
11551155
}
11561156

1157+
func (s *sriov) waitForVFLinks(pciAddr string, expectedNum int, maxTimeout time.Duration) error {
1158+
// VF destruction is asynchronous and safe to ignore
1159+
if expectedNum == 0 {
1160+
log.Log.V(2).Info("waitForVFLinks(): skipping wait for cleanup (expected 0 VFs) - async destroy", "device", pciAddr)
1161+
return nil
1162+
}
1163+
1164+
log.Log.V(2).Info("waitForVFLinks(): waiting for VF symlinks",
1165+
"device", pciAddr,
1166+
"expected", expectedNum,
1167+
"maxTimeout", maxTimeout)
1168+
1169+
start := time.Now()
1170+
sleepDuration := 500 * time.Millisecond
1171+
1172+
for time.Since(start) < maxTimeout {
1173+
vfAddrs, err := s.dputilsLib.GetVFList(pciAddr)
1174+
if err != nil {
1175+
log.Log.V(2).Info("waitForVFLinks(): GetVFList failed, retrying", "err", err)
1176+
time.Sleep(sleepDuration)
1177+
continue
1178+
}
1179+
1180+
current := len(vfAddrs)
1181+
if current >= expectedNum {
1182+
// Check all have physfn symlink
1183+
allReady := true
1184+
for _, vfAddr := range vfAddrs {
1185+
linkPath := filepath.Join(vars.FilesystemRoot, consts.SysBusPciDevices, vfAddr, "physfn")
1186+
if _, err := os.Lstat(linkPath); err != nil {
1187+
log.Log.V(2).Info("waitForVFLinks(): physfn symlink missing", "vf", vfAddr, "err", err)
1188+
allReady = false
1189+
break
1190+
}
1191+
}
1192+
if allReady {
1193+
log.Log.V(2).Info("waitForVFLinks(): all expected VF symlinks ready")
1194+
return nil
1195+
}
1196+
}
1197+
1198+
time.Sleep(sleepDuration)
1199+
1200+
sleepDuration = time.Duration(float64(sleepDuration) * 1.5)
1201+
if sleepDuration > 5*time.Second {
1202+
sleepDuration = 5 * time.Second
1203+
}
1204+
}
1205+
1206+
return fmt.Errorf("timeout waiting for %d VF symlinks on %s (max %v)", expectedNum, pciAddr, maxTimeout)
1207+
}
1208+
11571209
// setEswitchModeAndNumVFsMlx configures PF eSwitch and sriov_numvfs in the following order:
11581210
// a. set eSwitchMode to legacy
11591211
// b. set the desired number of Virtual Functions
@@ -1182,6 +1234,9 @@ func (s *sriov) setEswitchModeAndNumVFsMlx(pciAddr string, desiredEswitchMode st
11821234
if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil {
11831235
return err
11841236
}
1237+
if err := s.waitForVFLinks(pciAddr, numVFs, 120*time.Second); err != nil {
1238+
return err
1239+
}
11851240

11861241
if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
11871242
if err := s.unbindAllVFsOnPF(pciAddr); err != nil {

pkg/host/internal/sriov/sriov_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ var _ = Describe("SRIOV", func() {
478478
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
479479
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
480480
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("", syscall.EINVAL)
481-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(2)
481+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
482482
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
483483
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
484484
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -547,7 +547,7 @@ var _ = Describe("SRIOV", func() {
547547
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
548548
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
549549
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("", nil)
550-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(2)
550+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
551551
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
552552
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
553553
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -616,7 +616,7 @@ var _ = Describe("SRIOV", func() {
616616
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
617617
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
618618
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("smfs", nil)
619-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(2)
619+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
620620
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
621621
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
622622
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
@@ -685,7 +685,7 @@ var _ = Describe("SRIOV", func() {
685685
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
686686
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
687687
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("test", nil)
688-
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(4)
688+
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).AnyTimes()
689689
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
690690
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
691691
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)

0 commit comments

Comments
 (0)