Skip to content

Commit d41be2d

Browse files
committed
[enhance] attach device with 5 retry; wait /device/block 0.5s -> 5s
Signed-off-by: zhuangbowei.zbw <zhuangbowei.zbw@alibaba-inc.com>
1 parent 95d4fff commit d41be2d

File tree

2 files changed

+33
-3
lines changed

2 files changed

+33
-3
lines changed

pkg/snapshot/overlay.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ func (o *snapshotter) createMountPoint(ctx context.Context, kind snapshots.Kind,
613613
}
614614
log.G(ctx).Debugf("attachAndMountBlockDevice (obdID: %s, writeType: %s, fsType %s, targetPath: %s)",
615615
obdID, writeType, fsType, o.overlaybdTargetPath(obdID))
616-
if err = o.attachAndMountBlockDevice(ctx, obdID, writeType, fsType, parent == ""); err != nil {
616+
if err = o.attachWithRetry(ctx, obdID, writeType, fsType, parent == ""); err != nil {
617617
log.G(ctx).Errorf("%v", err)
618618
return nil, fmt.Errorf("failed to attach and mount for snapshot %v: %w", obdID, err)
619619
}
@@ -773,7 +773,7 @@ func (o *snapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount,
773773
fsType = o.defaultFsType
774774
}
775775
}
776-
if err := o.attachAndMountBlockDevice(ctx, parentID, RoDir, fsType, false); err != nil {
776+
if err := o.attachWithRetry(ctx, parentID, RoDir, fsType, false); err != nil {
777777
return nil, fmt.Errorf("failed to attach and mount for snapshot %v: %w", key, err)
778778
}
779779
return o.basedOnBlockDeviceMount(ctx, s, RoDir)

pkg/snapshot/storage.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"context"
2222
"encoding/binary"
2323
"encoding/json"
24+
"errors"
2425
"fmt"
2526
"io"
2627
"math"
@@ -50,7 +51,7 @@ import (
5051
)
5152

5253
const (
53-
maxAttachAttempts = 50
54+
maxAttachAttempts = 500
5455

5556
// hba number used to create tcmu devices in configfs
5657
// all overlaybd devices are configured in /sys/kernel/config/target/core/user_999999999/
@@ -478,6 +479,35 @@ func (o *snapshotter) attachAndMountBlockDevice(ctx context.Context, snID string
478479
return nil
479480
}
480481
}
482+
return attachRetryError{lastErr}
483+
}
484+
485+
type attachRetryError struct {
486+
err error
487+
}
488+
489+
func (e attachRetryError) Error() string {
490+
return e.err.Error()
491+
}
492+
493+
func (o *snapshotter) attachWithRetry(ctx context.Context, snID string, writable string, fsType string, mkfs bool) error {
494+
maxRetry := 5
495+
var lastErr error
496+
for retry := 0; retry < maxRetry; retry++ {
497+
lastErr = o.attachAndMountBlockDevice(ctx, snID, writable, fsType, mkfs)
498+
if errors.As(lastErr, &attachRetryError{}) {
499+
log.G(ctx).Warnf("attach failed, retrying(%d/%d)... snID: %s, err: %v", retry+1, maxRetry, snID, lastErr)
500+
time.Sleep(1 * time.Second) // Wait for 1 second before retrying
501+
continue
502+
} else if lastErr != nil {
503+
log.G(ctx).Errorf("attach failed, snID: %s, err: %v", snID, lastErr)
504+
return lastErr
505+
} else {
506+
log.G(ctx).Infof("attach success, snID: %s", snID)
507+
return nil
508+
}
509+
}
510+
log.G(ctx).Errorf("attach failed, max retry reached")
481511
return lastErr
482512
}
483513

0 commit comments

Comments
 (0)