From 88957e7f86c29dd98c480f92e9673183a67aedb6 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Mon, 13 Jun 2022 18:31:14 +0200 Subject: [PATCH] WIP: pkg/agent: wait for all volumes to be detached before rebooting This commit provides PoC version of implementing agent waiting for all volumtes attached to the node to be detached as a step after draining the node, as shutting down the Pod does not mean the volume has been detached, as usually CSI agent will be running as a DaemonSet on the node and will take care of detaching the volume from the node when the pod shuts down. This commit improves rebooting experience, as right now if there is not enough time for CSI agent to detach the volumes from the node, node gets rebooted and pods using attached volumes have no way to be attached to other nodes, which effectively increases the downtime caused for stateful workloads. This commit still requires tests and better interface for the users. If someone wants to try this feature on their own cluster, I've published the following image I've been testing with: quay.io/invidian/flatcar-linux-update-operator:97c0dee50c807dbba7d2debc59b369f84002797e Closes #30 Signed-off-by: Mateusz Gozdek --- examples/deploy/rbac/cluster-role.yaml | 6 ++++++ pkg/agent/agent.go | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/examples/deploy/rbac/cluster-role.yaml b/examples/deploy/rbac/cluster-role.yaml index f29d39e58..3c611b16b 100644 --- a/examples/deploy/rbac/cluster-role.yaml +++ b/examples/deploy/rbac/cluster-role.yaml @@ -47,3 +47,9 @@ rules: - daemonsets verbs: - get + - apiGroups: + - storage.k8s.io + resources: + - volumeattachments + verbs: + - list diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 2ca395ed1..6b807da87 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -290,6 +290,30 @@ func (k *klocksmith) process(ctx context.Context) error { klog.Info("Node drained, rebooting") + for { + attachments, err := k.clientset.StorageV1().VolumeAttachments().List(ctx, metav1.ListOptions{}) + if err != nil { + klog.Errorf("Listing volume attachments: %v", err) + continue + } + + anyVolumeAttached := false + + for _, attachment := range attachments.Items { + if attachment.Status.Attached && attachment.Spec.NodeName == k.nodeName { + anyVolumeAttached = true + klog.Infof("Volume %q is still attached, waiting for detach", attachment.Name) + } + } + + if !anyVolumeAttached { + klog.Info("All volumes are detached from node, rebooting.") + break + } + + time.Sleep(5 * time.Second) + } + // Reboot. k.lc.Reboot(false)