Skip to content

Commit 98932a1

Browse files
authored
Merge pull request #422 from yussufsh/test
reduce multipathd usage as it timeout if large number of disks (OCPBUGS-16878)
2 parents ac90847 + 6314235 commit 98932a1

File tree

5 files changed

+119
-394
lines changed

5 files changed

+119
-394
lines changed

pkg/device/device.go

Lines changed: 39 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@ import (
3030
)
3131

3232
var (
33-
lastCleanExecuted time.Time
34-
lastStaleCleanExecuted time.Time
35-
scanLock = &sync.Mutex{}
33+
scanLock = &sync.Mutex{}
3634
)
3735

3836
type LinuxDevice interface {
@@ -45,13 +43,12 @@ type LinuxDevice interface {
4543

4644
// Device struct
4745
type Device struct {
48-
Mapper string `json:"mapper,omitempty"`
49-
WWID string `json:"wwid,omitempty"`
50-
WWN string `json:"wwn,omitempty"`
51-
Slaves []string `json:"slaves,omitempty"`
46+
Mapper string `json:"mapper,omitempty"`
47+
WWN string `json:"wwn,omitempty"`
48+
Slaves int `json:"slaves,omitempty"`
5249
}
5350

54-
// NewLinuxDevice: new device with given wwn
51+
// NewLinuxDevice new device with given wwn
5552
func NewLinuxDevice(wwn string) LinuxDevice {
5653
return &Device{
5754
WWN: wwn,
@@ -62,7 +59,7 @@ func (d *Device) GetMapper() string {
6259
return d.Mapper
6360
}
6461

65-
// Populate: get all linux Devices
62+
// Populate get all linux Devices
6663
func (d *Device) Populate(needActivePath bool) error {
6764
args := []string{"ls", "--target", "multipath"}
6865
outBytes, err := exec.Command(dmsetupcommand, args...).CombinedOutput()
@@ -86,48 +83,51 @@ func (d *Device) Populate(needActivePath bool) error {
8683
klog.Warning(err)
8784
continue
8885
}
89-
tmpWWID := strings.TrimPrefix(uuid, "mpath-")
90-
tmpWWN := tmpWWID[1:] // truncate scsi-id prefix
91-
92-
if !strings.EqualFold(d.WWN, tmpWWN) {
93-
continue
94-
}
95-
96-
d.WWID = tmpWWID
9786
mapName, err := getMpathName(tmpPathname)
9887
if err != nil {
9988
return err
10089
}
101-
d.Mapper = "/dev/mapper/" + mapName
10290

103-
multipathdShowPaths, err := retryGetPathOfDevice(d, needActivePath)
91+
tmpWWID := strings.TrimPrefix(uuid, "mpath-")
92+
tmpWWN := tmpWWID[1:] // truncate scsi-id prefix
93+
94+
// get the active count; if 0 then cleanup the dm
95+
slavesCount, err := getPathsCount(mapName)
10496
if err != nil {
105-
err = fmt.Errorf("unable to get scsi slaves for device %s: %v", d.WWN, err)
106-
return err
97+
return fmt.Errorf("unable to count slaves for device %s: %v", d.WWN, err)
10798
}
108-
for _, path := range multipathdShowPaths {
109-
if !needActivePath || path.ChkState == "ready" {
110-
d.Slaves = append(d.Slaves, path.Device)
111-
}
99+
100+
if slavesCount == 0 {
101+
klog.Warningf("cleaning mapper %s as no active disks present", mapName)
102+
_ = multipathRemoveDmDevice(mapName)
103+
// even if wwn matches but no slaves lets skip
104+
} else if strings.EqualFold(d.WWN, tmpWWN) {
105+
// if atleast 1 active slave present then use it
106+
d.Mapper = "/dev/mapper/" + mapName
107+
d.Slaves = slavesCount
108+
break
112109
}
113110
}
114111

115112
return nil
116113
}
117114

118-
// DeleteDevice: delete the multipath device
115+
// DeleteDevice delete the multipath device
119116
func (d *Device) DeleteDevice() (err error) {
120-
if err = tearDownMultipathDevice(d); err != nil {
117+
if err := retryCleanupDevice(d); err != nil {
118+
klog.Warningf("error while deleting multipath device %s: %v", d.Mapper, err)
121119
return err
122120
}
121+
d.Mapper = ""
122+
d.Slaves = 0
123123
return nil
124124
}
125125

126-
// CreateDevice: attach and create linux devices to host
126+
// CreateDevice attach and create linux devices to host
127127
func (d *Device) CreateDevice() (err error) {
128128

129129
if err = d.createLinuxDevice(); err != nil {
130-
klog.Errorf("unable to create device for wwn %v", d.WWN)
130+
klog.Errorf("unable to create device for wwn %s", d.WWN)
131131
return err
132132
}
133133

@@ -145,14 +145,18 @@ func (d *Device) CreateDevice() (err error) {
145145
func scsiHostRescanWithLock() (err error) {
146146
start := time.Now()
147147
var scan bool = true
148-
defer scanLock.Unlock()
149148

150149
for {
151150
if scanLock.TryLock() {
152-
if scan {
153-
err = scsiHostRescan()
154-
}
155-
return err
151+
func() {
152+
defer scanLock.Unlock()
153+
if scan {
154+
// always clean orphan paths before scanning hosts
155+
cleanupOrphanPaths()
156+
err = scsiHostRescan()
157+
}
158+
}()
159+
return
156160
} else {
157161
if time.Since(start) > time.Minute {
158162
// Scanning usually takes < 30s. If wait is more than a min then return.
@@ -188,30 +192,11 @@ func (d *Device) createLinuxDevice() (err error) {
188192
if err != nil {
189193
return err
190194
}
191-
if len(d.Slaves) > 0 {
195+
if d.Slaves > 0 {
192196
// populated device with atleast 1 slave; job done
193197
return nil
194198
}
195199

196-
// cleaning up faulty, orphan, stale paths/maps
197-
// try only between 10 secs
198-
tmpTime := time.Now().Add(-10 * time.Second)
199-
if lastCleanExecuted.Before(tmpTime) {
200-
tryCleaningFaultyAndOrphan()
201-
lastCleanExecuted = time.Now()
202-
}
203-
204-
// handle stale paths
205-
// heavy operation hence try only between 25 secs
206-
tmpTime = time.Now().Add(-25 * time.Second)
207-
if lastStaleCleanExecuted.Before(tmpTime) {
208-
err = cleanupStalePaths()
209-
if err != nil {
210-
klog.Warning(err)
211-
}
212-
lastStaleCleanExecuted = time.Now()
213-
}
214-
215200
// some resting time
216201
time.Sleep(time.Second * 5)
217202
}
@@ -220,24 +205,6 @@ func (d *Device) createLinuxDevice() (err error) {
220205
return fmt.Errorf("fc device not found for wwn %s", d.WWN)
221206
}
222207

223-
// tryCleaningFaultyAndOrphan: house keeping when device cannot be found once
224-
func tryCleaningFaultyAndOrphan() {
225-
operations := []struct {
226-
cleanupFunc func() error
227-
description string
228-
}{
229-
{cleanupFunc: cleanupFaultyPaths, description: "faulty paths"},
230-
{cleanupFunc: cleanupOrphanPaths, description: "orphan paths"},
231-
{cleanupFunc: cleanupStaleMaps, description: "stale maps"},
232-
{cleanupFunc: cleanupErrorMultipathMaps, description: "error mappers"},
233-
}
234-
for _, op := range operations {
235-
if err := op.cleanupFunc(); err != nil {
236-
klog.Warningf("Failed to cleanup %s: %v", op.description, err)
237-
}
238-
}
239-
}
240-
241208
// scsiHostRescan: scans all scsi hosts
242209
func scsiHostRescan() error {
243210
scsiPath := "/sys/class/scsi_host/"

0 commit comments

Comments
 (0)