Skip to content

Commit 15c3f1b

Browse files
authored
Merge pull request kubernetes#87258 from verult/slow-rxm-attach
Parallelize attach operations across different nodes for volumes that allow multi-attach
2 parents 52d7614 + c6a03fa commit 15c3f1b

File tree

9 files changed

+725
-257
lines changed

9 files changed

+725
-257
lines changed

pkg/controller/volume/attachdetach/reconciler/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ go_library(
1616
"//pkg/controller/volume/attachdetach/statusupdater:go_default_library",
1717
"//pkg/kubelet/events:go_default_library",
1818
"//pkg/util/goroutinemap/exponentialbackoff:go_default_library",
19-
"//pkg/volume:go_default_library",
19+
"//pkg/volume/util:go_default_library",
2020
"//pkg/volume/util/operationexecutor:go_default_library",
2121
"//staging/src/k8s.io/api/core/v1:go_default_library",
2222
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",

pkg/controller/volume/attachdetach/reconciler/reconciler.go

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424
"strings"
2525
"time"
2626

27-
"k8s.io/api/core/v1"
27+
v1 "k8s.io/api/core/v1"
2828
"k8s.io/apimachinery/pkg/types"
2929
"k8s.io/apimachinery/pkg/util/wait"
3030
"k8s.io/client-go/tools/record"
@@ -34,7 +34,7 @@ import (
3434
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
3535
kevents "k8s.io/kubernetes/pkg/kubelet/events"
3636
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff"
37-
"k8s.io/kubernetes/pkg/volume"
37+
"k8s.io/kubernetes/pkg/volume/util"
3838
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
3939
)
4040

@@ -134,42 +134,6 @@ func (rc *reconciler) syncStates() {
134134
rc.attacherDetacher.VerifyVolumesAreAttached(volumesPerNode, rc.actualStateOfWorld)
135135
}
136136

137-
// isMultiAttachForbidden checks if attaching this volume to multiple nodes is definitely not allowed/possible.
138-
// In its current form, this function can only reliably say for which volumes it's definitely forbidden. If it returns
139-
// false, it is not guaranteed that multi-attach is actually supported by the volume type and we must rely on the
140-
// attacher to fail fast in such cases.
141-
// Please see https://github.com/kubernetes/kubernetes/issues/40669 and https://github.com/kubernetes/kubernetes/pull/40148#discussion_r98055047
142-
func (rc *reconciler) isMultiAttachForbidden(volumeSpec *volume.Spec) bool {
143-
if volumeSpec.Volume != nil {
144-
// Check for volume types which are known to fail slow or cause trouble when trying to multi-attach
145-
if volumeSpec.Volume.AzureDisk != nil ||
146-
volumeSpec.Volume.Cinder != nil {
147-
return true
148-
}
149-
}
150-
151-
// Only if this volume is a persistent volume, we have reliable information on whether it's allowed or not to
152-
// multi-attach. We trust in the individual volume implementations to not allow unsupported access modes
153-
if volumeSpec.PersistentVolume != nil {
154-
// Check for persistent volume types which do not fail when trying to multi-attach
155-
if len(volumeSpec.PersistentVolume.Spec.AccessModes) == 0 {
156-
// No access mode specified so we don't know for sure. Let the attacher fail if needed
157-
return false
158-
}
159-
160-
// check if this volume is allowed to be attached to multiple PODs/nodes, if yes, return false
161-
for _, accessMode := range volumeSpec.PersistentVolume.Spec.AccessModes {
162-
if accessMode == v1.ReadWriteMany || accessMode == v1.ReadOnlyMany {
163-
return false
164-
}
165-
}
166-
return true
167-
}
168-
169-
// we don't know if it's supported or not and let the attacher fail later in cases it's not supported
170-
return false
171-
}
172-
173137
func (rc *reconciler) reconcile() {
174138
// Detaches are triggered before attaches so that volumes referenced by
175139
// pods that are rescheduled to a different node are detached first.
@@ -182,9 +146,16 @@ func (rc *reconciler) reconcile() {
182146
// This check must be done before we do any other checks, as otherwise the other checks
183147
// may pass while at the same time the volume leaves the pending state, resulting in
184148
// double detach attempts
185-
if rc.attacherDetacher.IsOperationPending(attachedVolume.VolumeName, "") {
186-
klog.V(10).Infof("Operation for volume %q is already running. Can't start detach for %q", attachedVolume.VolumeName, attachedVolume.NodeName)
187-
continue
149+
if util.IsMultiAttachForbidden(attachedVolume.VolumeSpec) {
150+
if rc.attacherDetacher.IsOperationPending(attachedVolume.VolumeName, "" /* podName */, "" /* nodeName */) {
151+
klog.V(10).Infof("Operation for volume %q is already running in the cluster. Can't start detach for %q", attachedVolume.VolumeName, attachedVolume.NodeName)
152+
continue
153+
}
154+
} else {
155+
if rc.attacherDetacher.IsOperationPending(attachedVolume.VolumeName, "" /* podName */, attachedVolume.NodeName) {
156+
klog.V(10).Infof("Operation for volume %q is already running for node %q. Can't start detach", attachedVolume.VolumeName, attachedVolume.NodeName)
157+
continue
158+
}
188159
}
189160

190161
// Set the detach request time
@@ -260,15 +231,17 @@ func (rc *reconciler) attachDesiredVolumes() {
260231
rc.actualStateOfWorld.ResetDetachRequestTime(volumeToAttach.VolumeName, volumeToAttach.NodeName)
261232
continue
262233
}
263-
// Don't even try to start an operation if there is already one running
264-
if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "") {
265-
if klog.V(10) {
266-
klog.Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName)
234+
235+
if util.IsMultiAttachForbidden(volumeToAttach.VolumeSpec) {
236+
237+
// Don't even try to start an operation if there is already one running for the given volume
238+
if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "" /* podName */, "" /* nodeName */) {
239+
if klog.V(10) {
240+
klog.Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName)
241+
}
242+
continue
267243
}
268-
continue
269-
}
270244

271-
if rc.isMultiAttachForbidden(volumeToAttach.VolumeSpec) {
272245
nodes := rc.actualStateOfWorld.GetNodesForAttachedVolume(volumeToAttach.VolumeName)
273246
if len(nodes) > 0 {
274247
if !volumeToAttach.MultiAttachErrorReported {
@@ -277,6 +250,17 @@ func (rc *reconciler) attachDesiredVolumes() {
277250
}
278251
continue
279252
}
253+
254+
} else {
255+
256+
// Don't even try to start an operation if there is already one running for the given volume and node.
257+
if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "" /* podName */, volumeToAttach.NodeName) {
258+
if klog.V(10) {
259+
klog.Infof("Operation for volume %q is already running for node %q. Can't start attach", volumeToAttach.VolumeName, volumeToAttach.NodeName)
260+
}
261+
continue
262+
}
263+
280264
}
281265

282266
// Volume/Node doesn't exist, spawn a goroutine to attach it

pkg/kubelet/volumemanager/reconciler/reconciler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ func (rc *reconciler) unmountDetachDevices() {
294294
for _, attachedVolume := range rc.actualStateOfWorld.GetUnmountedVolumes() {
295295
// Check IsOperationPending to avoid marking a volume as detached if it's in the process of mounting.
296296
if !rc.desiredStateOfWorld.VolumeExists(attachedVolume.VolumeName) &&
297-
!rc.operationExecutor.IsOperationPending(attachedVolume.VolumeName, nestedpendingoperations.EmptyUniquePodName) {
297+
!rc.operationExecutor.IsOperationPending(attachedVolume.VolumeName, nestedpendingoperations.EmptyUniquePodName, nestedpendingoperations.EmptyNodeName) {
298298
if attachedVolume.DeviceMayBeMounted() {
299299
// Volume is globally mounted to device, unmount it
300300
klog.V(5).Infof(attachedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountDevice", ""))
@@ -422,7 +422,7 @@ func (rc *reconciler) syncStates() {
422422
continue
423423
}
424424
// There is no pod that uses the volume.
425-
if rc.operationExecutor.IsOperationPending(reconstructedVolume.volumeName, nestedpendingoperations.EmptyUniquePodName) {
425+
if rc.operationExecutor.IsOperationPending(reconstructedVolume.volumeName, nestedpendingoperations.EmptyUniquePodName, nestedpendingoperations.EmptyNodeName) {
426426
klog.Warning("Volume is in pending operation, skip cleaning up mounts")
427427
}
428428
klog.V(2).Infof(

pkg/volume/util/nestedpendingoperations/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ go_library(
1414
"//pkg/util/goroutinemap/exponentialbackoff:go_default_library",
1515
"//pkg/volume/util/types:go_default_library",
1616
"//staging/src/k8s.io/api/core/v1:go_default_library",
17+
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
1718
"//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
1819
"//vendor/k8s.io/klog:go_default_library",
1920
],
@@ -27,6 +28,7 @@ go_test(
2728
"//pkg/util/goroutinemap/exponentialbackoff:go_default_library",
2829
"//pkg/volume/util/types:go_default_library",
2930
"//staging/src/k8s.io/api/core/v1:go_default_library",
31+
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
3032
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
3133
],
3234
)

0 commit comments

Comments
 (0)