Skip to content

Commit dbf83c8

Browse files
committed
disk/ad_slot: return better GRPC error message
expected new message shown in Kubernetes events: failed to reserve node i-xxxx for attach: still waiting for other disk(s) to finish attach/detach Note that I don't specify GRPC status code intentionally, to let GRPC determine the code automatically based on error types.
1 parent 515ad5d commit dbf83c8

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

pkg/disk/attachdetach_slot.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package disk
22

33
import (
44
"context"
5+
"errors"
56
"sync"
67
)
78

@@ -73,7 +74,7 @@ func (s serialAD_DetachSlot) Acquire(ctx context.Context) error {
7374
case s.slot <- struct{}{}:
7475
return nil
7576
case <-ctx.Done():
76-
return ctx.Err()
77+
return maybeWaitingAD(ctx.Err())
7778
}
7879
}
7980

@@ -85,7 +86,7 @@ func (s serialAD_AttachSlot) Acquire(ctx context.Context) error {
8586
case s.slot <- struct{}{}:
8687
return nil
8788
case <-ctx.Done():
88-
return ctx.Err()
89+
return maybeWaitingAD(ctx.Err())
8990
}
9091
}
9192

@@ -120,7 +121,7 @@ func (s maxConcurrentSlot) Acquire(ctx context.Context) error {
120121
case s.slots <- struct{}{}:
121122
return nil
122123
case <-ctx.Done():
123-
return ctx.Err()
124+
return maybeWaitingAD(ctx.Err())
124125
}
125126
}
126127

@@ -181,3 +182,20 @@ func NewSlots(detachConcurrency, attachConcurrency int) AttachDetachSlots {
181182
}
182183
return NewPerNodeSlots(makeSlot)
183184
}
185+
186+
type waitingAD struct{}
187+
188+
func (waitingAD) Error() string {
189+
return "still waiting for other disk(s) to finish attach/detach"
190+
}
191+
192+
func (waitingAD) Is(target error) bool {
193+
return target == context.DeadlineExceeded
194+
}
195+
196+
func maybeWaitingAD(err error) error {
197+
if errors.Is(err, context.DeadlineExceeded) {
198+
return waitingAD{}
199+
}
200+
return err
201+
}

pkg/disk/attachdetach_slot_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,15 @@ func TestSerialDetach_NoRace(t *testing.T) {
171171
t.Fatal("state not updated")
172172
}
173173
}
174+
175+
func TestWaitingADError(t *testing.T) {
176+
s := NewSlots(1, 0).GetSlotFor("node1").Detach()
177+
ctx := context.Background()
178+
assert.NoError(t, s.Acquire(ctx))
179+
180+
ctx, cancel := context.WithTimeout(ctx, 10*time.Millisecond)
181+
defer cancel()
182+
err := s.Acquire(ctx)
183+
assert.ErrorIs(t, err, waitingAD{})
184+
assert.ErrorIs(t, err, context.DeadlineExceeded)
185+
}

pkg/disk/cloud.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ func (ad *DiskAttachDetach) attachDisk(ctx context.Context, diskID, nodeID strin
181181

182182
slot := ad.slots.GetSlotFor(nodeID).Attach()
183183
if err := slot.Acquire(ctx); err != nil {
184-
return "", status.Errorf(codes.Aborted, "AttachDisk: get ad-slot for disk %s failed: %v", diskID, err)
184+
return "", fmt.Errorf("failed to reserve node %s for attach: %w", nodeID, err)
185185
}
186186
defer slot.Release()
187187

@@ -442,7 +442,7 @@ func (ad *DiskAttachDetach) detachDisk(ctx context.Context, ecsClient *ecs.Clien
442442
// NodeStageVolume/NodeUnstageVolume should be called by sequence
443443
slot := ad.slots.GetSlotFor(nodeID).Detach()
444444
if err := slot.Acquire(ctx); err != nil {
445-
return status.Errorf(codes.Aborted, "DetachDisk: get ad-slot for disk %s failed: %v", diskID, err)
445+
return fmt.Errorf("failed to reserve node %s for detach: %w", nodeID, err)
446446
}
447447
defer slot.Release()
448448

0 commit comments

Comments
 (0)