Skip to content

Commit 24ce5c6

Browse files
authored
wait for iscsi device removal
Replaces static sleep with a wait with timeout when deleting a device
1 parent dfa21af commit 24ce5c6

File tree

3 files changed

+120
-6
lines changed

3 files changed

+120
-6
lines changed

utils/devices.go

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"strings"
1313
"time"
1414

15+
"github.com/spf13/afero"
1516
"golang.org/x/net/context"
1617

1718
"github.com/netapp/trident/internal/fiji"
@@ -26,7 +27,8 @@ import (
2627
const LUKSMetadataSize = 18874368
2728

2829
const (
29-
luksDevicePrefix = "luks-"
30+
luksDevicePrefix = "luks-"
31+
devicesRemovalMaxWaitTime = 5 * time.Second
3032
)
3133

3234
var (
@@ -35,6 +37,8 @@ var (
3537
beforeRemoveFile = fiji.Register("beforeRemoveFile", "devices")
3638

3739
LuksCloseDurations = durations.TimeDuration{}
40+
41+
osFs = afero.NewOsFs()
3842
)
3943

4044
// waitForDevice accepts a device name and checks if it is present
@@ -175,6 +179,30 @@ func removeDevice(ctx context.Context, deviceInfo *models.ScsiDeviceInfo, ignore
175179
return nil
176180
}
177181

182+
// waitForDevicesRemoval waits for devices to be removed from the system.
183+
func waitForDevicesRemoval(ctx context.Context, osFs afero.Fs, devicePathPrefix string, deviceNames []string,
184+
maxWaitTime time.Duration,
185+
) error {
186+
startTime := time.Now()
187+
for time.Since(startTime) < maxWaitTime {
188+
anyExist := false
189+
for _, device := range deviceNames {
190+
path := filepath.Join(devicePathPrefix, device)
191+
if _, err := osFs.Stat(path); !os.IsNotExist(err) {
192+
anyExist = true
193+
break
194+
}
195+
}
196+
if !anyExist {
197+
return nil
198+
}
199+
time.Sleep(50 * time.Millisecond)
200+
}
201+
202+
Logc(ctx).WithField("devices", deviceNames).Debug("Timed out waiting for devices to be removed.")
203+
return errors.TimeoutError("timed out waiting for devices to be removed")
204+
}
205+
178206
// canFlushMultipathDevice determines whether device can be flushed.
179207
// 1. Check the health of path by executing 'multipath -C <devicePath>'
180208
// 2. If no error, return nil.
@@ -911,8 +939,13 @@ func removeSCSIDevice(ctx context.Context, deviceInfo *models.ScsiDeviceInfo, ig
911939
return false, err
912940
}
913941

914-
// Give the host a chance to fully process the removal
915-
time.Sleep(time.Second)
942+
// Wait for device to be removed. Do not ignore errors here as we need the device removed
943+
// for the force removal of the multipath device to succeed.
944+
err = waitForDevicesRemoval(ctx, osFs, iscsi.DevPrefix, deviceInfo.Devices, devicesRemovalMaxWaitTime)
945+
if err != nil {
946+
return false, err
947+
}
948+
916949
listAllISCSIDevices(ctx)
917950

918951
// If ignoreErrors was set to true while entering into this function and

utils/devices_linux.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"unsafe"
1515

1616
log "github.com/sirupsen/logrus"
17-
"github.com/spf13/afero"
1817
"golang.org/x/net/context"
1918
"golang.org/x/sys/unix"
2019

@@ -49,8 +48,6 @@ var (
4948
duringOpenBeforeCryptSetupOpen = fiji.Register("duringOpenBeforeCryptSetupOpen", "devices_linux")
5049
duringRotatePassphraseBeforeLuksKeyChange = fiji.Register("duringRotatePassphraseBeforeLuksKeyChange",
5150
"devices_linux")
52-
53-
osFs = afero.NewOsFs()
5451
)
5552

5653
// flushOneDevice flushes any outstanding I/O to a disk

utils/devices_test.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ import (
88
"testing"
99
"time"
1010

11+
"github.com/spf13/afero"
1112
"github.com/stretchr/testify/assert"
1213
"go.uber.org/mock/gomock"
1314

1415
mockexec "github.com/netapp/trident/mocks/mock_utils/mock_exec"
1516
"github.com/netapp/trident/mocks/mock_utils/mock_models/mock_luks"
17+
"github.com/netapp/trident/utils/errors"
1618
)
1719

1820
// ////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -266,3 +268,85 @@ func TestRemoveMultipathDeviceMapping(t *testing.T) {
266268
})
267269
}
268270
}
271+
272+
func TestWaitForDevicesRemoval(t *testing.T) {
273+
errMsg := "timed out waiting for devices to be removed"
274+
tests := map[string]struct {
275+
name string
276+
devicePathPrefix string
277+
deviceNames []string
278+
getOsFs func() (afero.Fs, error)
279+
maxWaitTime time.Duration
280+
expectedError error
281+
}{
282+
"Devices removed successfully": {
283+
devicePathPrefix: "/dev",
284+
deviceNames: []string{"sda", "sdb"},
285+
getOsFs: func() (afero.Fs, error) {
286+
return afero.NewMemMapFs(), nil
287+
},
288+
maxWaitTime: 1 * time.Second,
289+
expectedError: nil,
290+
},
291+
"Timeout waiting for devices to be removed": {
292+
devicePathPrefix: "/dev",
293+
deviceNames: []string{"sda", "sdb"},
294+
getOsFs: func() (afero.Fs, error) {
295+
osFs := afero.NewMemMapFs()
296+
_, err := osFs.Create("/dev/sda")
297+
if err != nil {
298+
return nil, err
299+
}
300+
_, err = osFs.Create("/dev/sdb")
301+
if err != nil {
302+
return nil, err
303+
}
304+
return osFs, nil
305+
},
306+
maxWaitTime: 1 * time.Second,
307+
expectedError: errors.TimeoutError(errMsg),
308+
},
309+
"Timeout waiting for last device to be removed": {
310+
devicePathPrefix: "/dev",
311+
deviceNames: []string{"sda", "sdb"},
312+
getOsFs: func() (afero.Fs, error) {
313+
osFs := afero.NewMemMapFs()
314+
_, err := osFs.Create("/dev/sdb")
315+
if err != nil {
316+
return nil, err
317+
}
318+
return osFs, nil
319+
},
320+
maxWaitTime: 1 * time.Second,
321+
expectedError: errors.TimeoutError(errMsg),
322+
},
323+
"Timeout waiting for first device to be removed": {
324+
devicePathPrefix: "/dev",
325+
deviceNames: []string{"sda", "sdb"},
326+
getOsFs: func() (afero.Fs, error) {
327+
osFs := afero.NewMemMapFs()
328+
_, err := osFs.Create("/dev/sda")
329+
if err != nil {
330+
return nil, err
331+
}
332+
return osFs, nil
333+
},
334+
maxWaitTime: 1 * time.Second,
335+
expectedError: errors.TimeoutError(errMsg),
336+
},
337+
}
338+
339+
for name, params := range tests {
340+
t.Run(name, func(t *testing.T) {
341+
fs, err := params.getOsFs()
342+
assert.NoError(t, err)
343+
err = waitForDevicesRemoval(context.Background(), fs, params.devicePathPrefix, params.deviceNames,
344+
params.maxWaitTime)
345+
if params.expectedError != nil {
346+
assert.EqualError(t, err, params.expectedError.Error())
347+
} else {
348+
assert.NoError(t, err)
349+
}
350+
})
351+
}
352+
}

0 commit comments

Comments
 (0)