@@ -257,6 +257,14 @@ const (
257
257
filterNodeLimit = 150
258
258
)
259
259
260
+ const (
261
+ // represents expected attachment status of a volume after attach
262
+ volumeAttachedStatus = "attached"
263
+
264
+ // represents expected attachment status of a volume after detach
265
+ volumeDetachedStatus = "detached"
266
+ )
267
+
260
268
// awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master
261
269
// The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation.
262
270
var awsTagNameMasterRoles = sets .NewString ("kubernetes.io/role/master" , "k8s.io/role/master" )
@@ -1943,7 +1951,6 @@ func (c *Cloud) getMountDevice(
1943
1951
// AWS API returns consistent result next time (i.e. the volume is detached).
1944
1952
status := volumeStatus [mappingVolumeID ]
1945
1953
klog .Warningf ("Got assignment call for already-assigned volume: %s@%s, volume status: %s" , mountDevice , mappingVolumeID , status )
1946
- return mountDevice , false , fmt .Errorf ("volume is still being detached from the node" )
1947
1954
}
1948
1955
return mountDevice , true , nil
1949
1956
}
@@ -2144,7 +2151,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
2144
2151
2145
2152
// waitForAttachmentStatus polls until the attachment status is the expected value
2146
2153
// On success, it returns the last attachment state.
2147
- func (d * awsDisk ) waitForAttachmentStatus (status string , expectedInstance , expectedDevice string ) (* ec2.VolumeAttachment , error ) {
2154
+ func (d * awsDisk ) waitForAttachmentStatus (status string , expectedInstance , expectedDevice string , alreadyAttached bool ) (* ec2.VolumeAttachment , error ) {
2148
2155
backoff := wait.Backoff {
2149
2156
Duration : volumeAttachmentStatusPollDelay ,
2150
2157
Factor : volumeAttachmentStatusFactor ,
@@ -2169,7 +2176,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2169
2176
if err != nil {
2170
2177
// The VolumeNotFound error is special -- we don't need to wait for it to repeat
2171
2178
if isAWSErrorVolumeNotFound (err ) {
2172
- if status == "detached" {
2179
+ if status == volumeDetachedStatus {
2173
2180
// The disk doesn't exist, assume it's detached, log warning and stop waiting
2174
2181
klog .Warningf ("Waiting for volume %q to be detached but the volume does not exist" , d .awsID )
2175
2182
stateStr := "detached"
@@ -2178,7 +2185,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2178
2185
}
2179
2186
return true , nil
2180
2187
}
2181
- if status == "attached" {
2188
+ if status == volumeAttachedStatus {
2182
2189
// The disk doesn't exist, complain, give up waiting and report error
2183
2190
klog .Warningf ("Waiting for volume %q to be attached but the volume does not exist" , d .awsID )
2184
2191
return false , err
@@ -2213,7 +2220,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2213
2220
}
2214
2221
}
2215
2222
if attachmentStatus == "" {
2216
- attachmentStatus = "detached"
2223
+ attachmentStatus = volumeDetachedStatus
2217
2224
}
2218
2225
if attachment != nil {
2219
2226
// AWS eventual consistency can go back in time.
@@ -2242,6 +2249,13 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2242
2249
}
2243
2250
}
2244
2251
2252
+ // if we expected volume to be attached and it was reported as already attached via DescribeInstance call
2253
+ // but DescribeVolume told us volume is detached, we will short-circuit this long wait loop and return error
2254
+ // so as AttachDisk can be retried without waiting for 20 minutes.
2255
+ if (status == volumeAttachedStatus ) && alreadyAttached && (attachmentStatus != status ) {
2256
+ return false , fmt .Errorf ("attachment of disk %q failed, expected device to be attached but was %s" , d .name , attachmentStatus )
2257
+ }
2258
+
2245
2259
if attachmentStatus == status {
2246
2260
// Attachment is in requested state, finish waiting
2247
2261
return true , nil
@@ -2387,7 +2401,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
2387
2401
klog .V (2 ).Infof ("AttachVolume volume=%q instance=%q request returned %v" , disk .awsID , awsInstance .awsID , attachResponse )
2388
2402
}
2389
2403
2390
- attachment , err := disk .waitForAttachmentStatus ("attached" , awsInstance .awsID , ec2Device )
2404
+ attachment , err := disk .waitForAttachmentStatus ("attached" , awsInstance .awsID , ec2Device , alreadyAttached )
2391
2405
2392
2406
if err != nil {
2393
2407
if err == wait .ErrWaitTimeout {
@@ -2465,7 +2479,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
2465
2479
return "" , errors .New ("no response from DetachVolume" )
2466
2480
}
2467
2481
2468
- attachment , err := diskInfo .disk .waitForAttachmentStatus ("detached" , awsInstance .awsID , "" )
2482
+ attachment , err := diskInfo .disk .waitForAttachmentStatus ("detached" , awsInstance .awsID , "" , false )
2469
2483
if err != nil {
2470
2484
return "" , err
2471
2485
}
@@ -4773,7 +4787,7 @@ func setNodeDisk(
4773
4787
}
4774
4788
4775
4789
func getInitialAttachDetachDelay (status string ) time.Duration {
4776
- if status == "detached" {
4790
+ if status == volumeDetachedStatus {
4777
4791
return volumeDetachmentStatusInitialDelay
4778
4792
}
4779
4793
return volumeAttachmentStatusInitialDelay
0 commit comments