Skip to content

Commit 4c0c97d

Browse files
committed
Fix attachment of just detached AWS volumes
AWS API has eventual consistency model and it can return stale data. For example, DescribeInstances can return that a volume is still being detached, while it has already been confirmed to be detached. AttachDisk() should return error in this case, allowing A/D controller to retry, and not to assume that the volume is attached.
1 parent 24424e2 commit 4c0c97d

File tree

1 file changed

+16
-3
lines changed
  • staging/src/k8s.io/legacy-cloud-providers/aws

1 file changed

+16
-3
lines changed

staging/src/k8s.io/legacy-cloud-providers/aws/aws.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import (
4747
"github.com/aws/aws-sdk-go/service/kms"
4848
"github.com/aws/aws-sdk-go/service/sts"
4949
"gopkg.in/gcfg.v1"
50-
"k8s.io/api/core/v1"
50+
v1 "k8s.io/api/core/v1"
5151
"k8s.io/klog"
5252

5353
"k8s.io/apimachinery/pkg/api/resource"
@@ -63,7 +63,7 @@ import (
6363
"k8s.io/client-go/pkg/version"
6464
"k8s.io/client-go/tools/cache"
6565
"k8s.io/client-go/tools/record"
66-
"k8s.io/cloud-provider"
66+
cloudprovider "k8s.io/cloud-provider"
6767
nodehelpers "k8s.io/cloud-provider/node/helpers"
6868
servicehelpers "k8s.io/cloud-provider/service/helpers"
6969
cloudvolume "k8s.io/cloud-provider/volume"
@@ -1861,6 +1861,7 @@ func (c *Cloud) getMountDevice(
18611861
assign bool) (assigned mountDevice, alreadyAttached bool, err error) {
18621862

18631863
deviceMappings := map[mountDevice]EBSVolumeID{}
1864+
volumeStatus := map[EBSVolumeID]string{} // for better logging of volume status
18641865
for _, blockDevice := range info.BlockDeviceMappings {
18651866
name := aws.StringValue(blockDevice.DeviceName)
18661867
if strings.HasPrefix(name, "/dev/sd") {
@@ -1872,6 +1873,10 @@ func (c *Cloud) getMountDevice(
18721873
if len(name) < 1 || len(name) > 2 {
18731874
klog.Warningf("Unexpected EBS DeviceName: %q", aws.StringValue(blockDevice.DeviceName))
18741875
}
1876+
if blockDevice.Ebs != nil && blockDevice.Ebs.VolumeId != nil {
1877+
volumeStatus[EBSVolumeID(*blockDevice.Ebs.VolumeId)] = aws.StringValue(blockDevice.Ebs.Status)
1878+
}
1879+
18751880
deviceMappings[mountDevice(name)] = EBSVolumeID(aws.StringValue(blockDevice.Ebs.VolumeId))
18761881
}
18771882

@@ -1889,7 +1894,15 @@ func (c *Cloud) getMountDevice(
18891894
for mountDevice, mappingVolumeID := range deviceMappings {
18901895
if volumeID == mappingVolumeID {
18911896
if assign {
1892-
klog.Warningf("Got assignment call for already-assigned volume: %s@%s", mountDevice, mappingVolumeID)
1897+
// DescribeInstances shows the volume as attached / detaching, while Kubernetes
1898+
// cloud provider thinks it's detached.
1899+
// This can happened when the volume has just been detached from the same node
1900+
// and AWS API returns stale data in this DescribeInstances ("eventual consistency").
1901+
// Fail the attachment and let A/D controller retry in a while, hoping that
1902+
// AWS API returns consistent result next time (i.e. the volume is detached).
1903+
status := volumeStatus[mappingVolumeID]
1904+
klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status)
1905+
return mountDevice, false, fmt.Errorf("volume is still being detached from the node")
18931906
}
18941907
return mountDevice, true, nil
18951908
}

0 commit comments

Comments
 (0)