@@ -257,6 +257,14 @@ const (
257
257
filterNodeLimit = 150
258
258
)
259
259
260
+ const (
261
+ // represents expected attachment status of a volume after attach
262
+ volumeAttachedStatus = "attached"
263
+
264
+ // represents expected attachment status of a volume after detach
265
+ volumeDetachedStatus = "detached"
266
+ )
267
+
260
268
// awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master
261
269
// The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation.
262
270
var awsTagNameMasterRoles = sets .NewString ("kubernetes.io/role/master" , "k8s.io/role/master" )
@@ -1967,7 +1975,6 @@ func (c *Cloud) getMountDevice(
1967
1975
// AWS API returns consistent result next time (i.e. the volume is detached).
1968
1976
status := volumeStatus [mappingVolumeID ]
1969
1977
klog .Warningf ("Got assignment call for already-assigned volume: %s@%s, volume status: %s" , mountDevice , mappingVolumeID , status )
1970
- return mountDevice , false , fmt .Errorf ("volume is still being detached from the node" )
1971
1978
}
1972
1979
return mountDevice , true , nil
1973
1980
}
@@ -2168,7 +2175,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
2168
2175
2169
2176
// waitForAttachmentStatus polls until the attachment status is the expected value
2170
2177
// On success, it returns the last attachment state.
2171
- func (d * awsDisk ) waitForAttachmentStatus (status string , expectedInstance , expectedDevice string ) (* ec2.VolumeAttachment , error ) {
2178
+ func (d * awsDisk ) waitForAttachmentStatus (status string , expectedInstance , expectedDevice string , alreadyAttached bool ) (* ec2.VolumeAttachment , error ) {
2172
2179
backoff := wait.Backoff {
2173
2180
Duration : volumeAttachmentStatusPollDelay ,
2174
2181
Factor : volumeAttachmentStatusFactor ,
@@ -2193,7 +2200,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2193
2200
if err != nil {
2194
2201
// The VolumeNotFound error is special -- we don't need to wait for it to repeat
2195
2202
if isAWSErrorVolumeNotFound (err ) {
2196
- if status == "detached" {
2203
+ if status == volumeDetachedStatus {
2197
2204
// The disk doesn't exist, assume it's detached, log warning and stop waiting
2198
2205
klog .Warningf ("Waiting for volume %q to be detached but the volume does not exist" , d .awsID )
2199
2206
stateStr := "detached"
@@ -2202,7 +2209,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2202
2209
}
2203
2210
return true , nil
2204
2211
}
2205
- if status == "attached" {
2212
+ if status == volumeAttachedStatus {
2206
2213
// The disk doesn't exist, complain, give up waiting and report error
2207
2214
klog .Warningf ("Waiting for volume %q to be attached but the volume does not exist" , d .awsID )
2208
2215
return false , err
@@ -2237,7 +2244,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2237
2244
}
2238
2245
}
2239
2246
if attachmentStatus == "" {
2240
- attachmentStatus = "detached"
2247
+ attachmentStatus = volumeDetachedStatus
2241
2248
}
2242
2249
if attachment != nil {
2243
2250
// AWS eventual consistency can go back in time.
@@ -2266,6 +2273,13 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
2266
2273
}
2267
2274
}
2268
2275
2276
+ // if we expected volume to be attached and it was reported as already attached via DescribeInstance call
2277
+ // but DescribeVolume told us volume is detached, we will short-circuit this long wait loop and return error
2278
+ // so as AttachDisk can be retried without waiting for 20 minutes.
2279
+ if (status == volumeAttachedStatus ) && alreadyAttached && (attachmentStatus != status ) {
2280
+ return false , fmt .Errorf ("attachment of disk %q failed, expected device to be attached but was %s" , d .name , attachmentStatus )
2281
+ }
2282
+
2269
2283
if attachmentStatus == status {
2270
2284
// Attachment is in requested state, finish waiting
2271
2285
return true , nil
@@ -2411,7 +2425,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
2411
2425
klog .V (2 ).Infof ("AttachVolume volume=%q instance=%q request returned %v" , disk .awsID , awsInstance .awsID , attachResponse )
2412
2426
}
2413
2427
2414
- attachment , err := disk .waitForAttachmentStatus ("attached" , awsInstance .awsID , ec2Device )
2428
+ attachment , err := disk .waitForAttachmentStatus ("attached" , awsInstance .awsID , ec2Device , alreadyAttached )
2415
2429
2416
2430
if err != nil {
2417
2431
if err == wait .ErrWaitTimeout {
@@ -2489,7 +2503,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
2489
2503
return "" , errors .New ("no response from DetachVolume" )
2490
2504
}
2491
2505
2492
- attachment , err := diskInfo .disk .waitForAttachmentStatus ("detached" , awsInstance .awsID , "" )
2506
+ attachment , err := diskInfo .disk .waitForAttachmentStatus ("detached" , awsInstance .awsID , "" , false )
2493
2507
if err != nil {
2494
2508
return "" , err
2495
2509
}
@@ -4797,7 +4811,7 @@ func setNodeDisk(
4797
4811
}
4798
4812
4799
4813
func getInitialAttachDetachDelay (status string ) time.Duration {
4800
- if status == "detached" {
4814
+ if status == volumeDetachedStatus {
4801
4815
return volumeDetachmentStatusInitialDelay
4802
4816
}
4803
4817
return volumeAttachmentStatusInitialDelay
0 commit comments