Skip to content

Commit 26c417c

Browse files
committed
disk: fix Detaching disks treated as attached
We should reject to attach disk detaching from the requested node.
1 parent de34e14 commit 26c417c

File tree

2 files changed

+101
-56
lines changed

2 files changed

+101
-56
lines changed

pkg/disk/cloud.go

Lines changed: 97 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,51 @@ func (ad *DiskAttachDetach) findDevice(ctx context.Context, diskID, serial strin
162162
return device, nil
163163
}
164164

165+
type attachAction int
166+
167+
const (
168+
giveUp attachAction = iota
169+
alreadyAttached
170+
detachFirst
171+
forceAttach
172+
attachNormally
173+
)
174+
175+
func chooseAttachAction(disk *ecs.Disk, instanceID string) (attachAction, error) {
176+
if disk.MultiAttach == "Disabled" {
177+
switch disk.Status {
178+
case DiskStatusInuse:
179+
if disk.InstanceId == instanceID {
180+
return alreadyAttached, nil
181+
}
182+
return detachFirst, nil
183+
case DiskStatusAttaching:
184+
return giveUp, status.Errorf(codes.Aborted, "AttachDisk: Disk %s is attaching to %s", disk.DiskId, instanceID)
185+
case DiskStatusDetaching:
186+
if disk.InstanceId == instanceID {
187+
return giveUp, status.Errorf(codes.Aborted, "AttachDisk: Disk %s is detaching from %s", disk.DiskId, instanceID)
188+
}
189+
// disk is detaching from another instance
190+
return forceAttach, nil
191+
}
192+
} else {
193+
switch disk.Status {
194+
case DiskStatusInuse:
195+
if waitstatus.IsInstanceAttached(disk, instanceID) {
196+
return alreadyAttached, nil
197+
}
198+
case DiskStatusDetaching:
199+
a := disk.Attachments.Attachment
200+
if len(a) == 1 && a[0].InstanceId == instanceID {
201+
return giveUp, status.Errorf(codes.Aborted, "AttachDisk: Disk %s is detaching from %s", disk.DiskId, instanceID)
202+
}
203+
}
204+
// Not sure for status == "Attaching", maybe attaching to another node, but detaching from the requested node?
205+
}
206+
// Most likely status == "Available". But let AttachDisk OpenAPI to decide.
207+
return attachNormally, nil
208+
}
209+
165210
// Attach Alibaba Cloud disk.
166211
// Returns device path if fromNode, disk serial number otherwise.
167212
func (ad *DiskAttachDetach) attachDisk(ctx context.Context, diskID, nodeID string, fromNode bool) (string, error) {
@@ -207,35 +252,11 @@ func (ad *DiskAttachDetach) attachDisk(ctx context.Context, diskID, nodeID strin
207252
}
208253
}
209254

210-
tryForceAttach := false
211-
212-
// disk is attached, means disk_ad_controller env is true, disk must be created after 2020.06
213-
switch disk.Status {
214-
case DiskStatusInuse:
215-
if disk.InstanceId == nodeID {
216-
if !fromNode {
217-
klog.Infof("AttachDisk: Disk %s is already attached to Instance %s, skipping", diskID, disk.InstanceId)
218-
return disk.SerialNumber, nil
219-
}
220-
deviceName, err := GetVolumeDeviceName(diskID)
221-
if err == nil && deviceName != "" && IsFileExisting(deviceName) {
222-
klog.Infof("AttachDisk: Disk %s is already attached to self Instance %s, and device is: %s", diskID, disk.InstanceId, deviceName)
223-
return deviceName, nil
224-
} else if disk.SerialNumber != "" {
225-
// wait for pci attach ready
226-
time.Sleep(5 * time.Second)
227-
klog.Infof("AttachDisk: find disk dev after 5 seconds")
228-
deviceName, err := GetVolumeDeviceName(diskID)
229-
if err == nil && deviceName != "" && IsFileExisting(deviceName) {
230-
klog.Infof("AttachDisk: Disk %s is already attached to self Instance %s, and device is: %s", diskID, disk.InstanceId, deviceName)
231-
return deviceName, nil
232-
}
233-
err = fmt.Errorf("AttachDisk: disk device cannot be found in node, diskid: %s, deviceName: %s, err: %+v", diskID, deviceName, err)
234-
return "", err
235-
}
236-
klog.Warningf("AttachDisk: Disk (no serial) %s is already attached to instance %s, but device unknown, will be detached and try again", diskID, disk.InstanceId)
237-
}
238-
255+
action, err := chooseAttachAction(disk, nodeID)
256+
if err != nil {
257+
return "", err
258+
}
259+
if action == detachFirst || action == forceAttach {
239260
if GlobalConfigVar.DiskBdfEnable {
240261
if allowed, err := forceDetachAllowed(GlobalConfigVar.EcsClient, disk); err != nil {
241262
return "", status.Errorf(codes.Aborted, "forceDetachAllowed failed: %v", err)
@@ -247,31 +268,54 @@ func (ad *DiskAttachDetach) attachDisk(ctx context.Context, diskID, nodeID strin
247268
if !GlobalConfigVar.DetachBeforeAttach {
248269
return "", status.Errorf(codes.Aborted, "AttachDisk: Disk %s is already attached to instance %s, env DISK_FORCE_DETACHED is false reject force detach", diskID, disk.InstanceId)
249270
}
250-
if canForceAttach {
251-
tryForceAttach = true
252-
} else {
253-
klog.Infof("AttachDisk: Disk %s is already attached to instance %s, will be detached", diskID, disk.InstanceId)
254-
detachRequest := ecs.CreateDetachDiskRequest()
255-
detachRequest.InstanceId = disk.InstanceId
256-
detachRequest.DiskId = disk.DiskId
257-
for key, value := range GlobalConfigVar.RequestBaseInfo {
258-
detachRequest.AppendUserAgent(key, value)
259-
}
260-
_, err = ad.ecs.DetachDisk(detachRequest)
261-
if err != nil {
262-
klog.Errorf("AttachDisk: Can't Detach disk %s from instance %s: with error: %v", diskID, disk.InstanceId, err)
263-
return "", status.Errorf(codes.Aborted, "AttachDisk: Can't Detach disk %s from instance %s: with error: %v", diskID, disk.InstanceId, err)
264-
}
265-
klog.Infof("AttachDisk: Wait for disk %s to be detached", diskID)
266-
if err := ad.waitForDiskDetached(ctx, diskID, nodeID); err != nil {
267-
return "", err
271+
}
272+
if canForceAttach && action == detachFirst {
273+
action = forceAttach
274+
}
275+
if action == forceAttach && !canForceAttach {
276+
action = attachNormally // should fail, but try it anyway
277+
}
278+
279+
switch action {
280+
case alreadyAttached:
281+
if !fromNode {
282+
klog.Infof("AttachDisk: Disk %s is already attached to Instance %s, skipping", diskID, disk.InstanceId)
283+
return disk.SerialNumber, nil
284+
}
285+
deviceName, err := GetVolumeDeviceName(diskID)
286+
if err == nil && deviceName != "" && IsFileExisting(deviceName) {
287+
klog.Infof("AttachDisk: Disk %s is already attached to self Instance %s, and device is: %s", diskID, disk.InstanceId, deviceName)
288+
return deviceName, nil
289+
} else if disk.SerialNumber != "" {
290+
// wait for pci attach ready
291+
time.Sleep(5 * time.Second)
292+
klog.Infof("AttachDisk: find disk dev after 5 seconds")
293+
deviceName, err := GetVolumeDeviceName(diskID)
294+
if err == nil && deviceName != "" && IsFileExisting(deviceName) {
295+
klog.Infof("AttachDisk: Disk %s is already attached to self Instance %s, and device is: %s", diskID, disk.InstanceId, deviceName)
296+
return deviceName, nil
268297
}
298+
err = fmt.Errorf("AttachDisk: disk device cannot be found in node, diskid: %s, deviceName: %s, err: %+v", diskID, deviceName, err)
299+
return "", err
300+
}
301+
klog.Warningf("AttachDisk: Disk (no serial) %s is already attached to instance %s, but device unknown, will be detached and try again", diskID, disk.InstanceId)
302+
fallthrough
303+
case detachFirst:
304+
klog.Infof("AttachDisk: Disk %s is already attached to instance %s, will be detached", diskID, disk.InstanceId)
305+
detachRequest := ecs.CreateDetachDiskRequest()
306+
detachRequest.InstanceId = disk.InstanceId
307+
detachRequest.DiskId = disk.DiskId
308+
for key, value := range GlobalConfigVar.RequestBaseInfo {
309+
detachRequest.AppendUserAgent(key, value)
269310
}
270-
case DiskStatusAttaching:
271-
return "", status.Errorf(codes.Aborted, "AttachDisk: Disk %s is attaching %v", diskID, disk)
272-
case DiskStatusDetaching:
273-
if canForceAttach {
274-
tryForceAttach = true
311+
_, err = ad.ecs.DetachDisk(detachRequest)
312+
if err != nil {
313+
klog.Errorf("AttachDisk: Can't Detach disk %s from instance %s: with error: %v", diskID, disk.InstanceId, err)
314+
return "", status.Errorf(codes.Aborted, "AttachDisk: Can't Detach disk %s from instance %s: with error: %v", diskID, disk.InstanceId, err)
315+
}
316+
klog.Infof("AttachDisk: Wait for disk %s to be detached", diskID)
317+
if err := ad.waitForDiskDetached(ctx, diskID, nodeID); err != nil {
318+
return "", err
275319
}
276320
}
277321

@@ -287,7 +331,7 @@ func (ad *DiskAttachDetach) attachDisk(ctx context.Context, diskID, nodeID strin
287331
attachRequest := ecs.CreateAttachDiskRequest()
288332
attachRequest.InstanceId = nodeID
289333
attachRequest.DiskId = diskID
290-
if tryForceAttach {
334+
if action == forceAttach {
291335
attachRequest.Force = requests.NewBoolean(true)
292336
logger.V(1).Info("try force attach", "from", disk.InstanceId, "to", nodeID)
293337
}

pkg/disk/cloud_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -687,9 +687,10 @@ func TestAttachDisk(t *testing.T) {
687687
expectErr: true,
688688
},
689689
{
690-
name: "detaching from self",
691-
before: disk("Detaching", "i-testinstanceid"),
692-
after: disk("In_use", "i-testinstanceid"), // FIXME
690+
name: "detaching from self",
691+
before: disk("Detaching", "i-testinstanceid"),
692+
noAttach: true,
693+
expectErr: true,
693694
},
694695
{
695696
// This not supported by ECS, but desired by us to speed up failover. Hopes they will support it someday.

0 commit comments

Comments
 (0)