@@ -21,7 +21,9 @@ import (
2121 "sync/atomic"
2222 "time"
2323
24+ "github.com/cenkalti/backoff/v4"
2425 corev1 "k8s.io/api/core/v1"
26+ "k8s.io/apimachinery/pkg/api/errors"
2527 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2628 "k8s.io/client-go/rest"
2729
@@ -31,7 +33,10 @@ import (
3133
3234const defaultZTunnelKeepAliveCheckInterval = 5 * time .Second
3335
34- var log = scopes .CNIAgent
36+ var (
37+ log = scopes .CNIAgent
38+ tokenWaitBackoff = time .Second
39+ )
3540
3641type MeshDataplane interface {
3742 // MUST be called first, (even before Start()).
@@ -119,18 +124,47 @@ func (s *Server) Stop(skipCleanup bool) {
119124 s .dataplane .Stop (skipCleanup )
120125}
121126
122- func (s * Server ) ShouldStopForUpgrade (selfName , selfNamespace string ) bool {
127+ // ShouldStopCleanup of istio-cni config and binary when upgrading or on node reboot
128+ func (s * Server ) ShouldStopCleanup (selfName , selfNamespace string , istioOwnedCNIConfig bool ) bool {
123129 dsName := fmt .Sprintf ("%s-node" , selfName )
124- cniDS , err := s .kubeClient .Kube ().AppsV1 ().DaemonSets (selfNamespace ).Get (context .Background (), dsName , metav1.GetOptions {})
125- log .Debugf ("Daemonset %s has deletion timestamp?: %+v" , dsName , cniDS .DeletionTimestamp )
126- if err == nil && cniDS != nil && cniDS .DeletionTimestamp == nil {
127- log .Infof ("terminating, but parent DS %s is still present, this is an upgrade, leaving plugin in place" , dsName )
128- return true
130+ shouldStopCleanup := false
131+ var numRetries uint64
132+ // use different defaults when using an istio owned CNI config file
133+ if istioOwnedCNIConfig {
134+ shouldStopCleanup = true
135+ numRetries = 2
129136 }
130-
131- // If the DS is gone, it's definitely not an upgrade, so carry on like normal.
132- log .Infof ("parent DS %s is gone or marked for deletion, this is not an upgrade, shutting down normally %s" , dsName , err )
133- return false
137+ err := backoff .Retry (
138+ func () error {
139+ cniDS , err := s .kubeClient .Kube ().AppsV1 ().DaemonSets (selfNamespace ).Get (context .Background (), dsName , metav1.GetOptions {})
140+
141+ if err == nil && cniDS != nil && cniDS .DeletionTimestamp == nil {
142+ log .Infof ("terminating, but parent DaemonSet %s is still present, this is an upgrade or a node reboot, leaving plugin in place" , dsName )
143+ shouldStopCleanup = true
144+ return nil
145+ }
146+ if errors .IsNotFound (err ) || (cniDS != nil && cniDS .DeletionTimestamp != nil ) {
147+ // If the DS is gone, or marked for deletion, this is not an upgrade.
148+ // We can safely shut down the plugin.
149+ log .Infof ("parent DaemonSet %s is not found or marked for deletion, this is not an upgrade, shutting down normally" , dsName )
150+ shouldStopCleanup = false
151+ return nil
152+ }
153+ if errors .IsUnauthorized (err ) {
154+ log .Infof ("permission to get parent DaemonSet %s has been revoked manually or due to uninstall, this is not an upgrade, " +
155+ "shutting down normally" , dsName )
156+ shouldStopCleanup = false
157+ return nil
158+ }
159+ log .Infof ("failed to get parent DS %s, retrying: %v" , dsName , err )
160+ return err
161+ },
162+ // Limiting retries to 3 so other shutdown tasks can complete before the graceful shutdown period ends
163+ backoff .WithMaxRetries (backoff .NewConstantBackOff (tokenWaitBackoff ), numRetries ))
164+ if err != nil {
165+ log .Infof ("failed to get parent DaemonSet %s, returning %t: %v" , dsName , shouldStopCleanup , err )
166+ }
167+ return shouldStopCleanup
134168}
135169
136170// buildKubeClient creates the kube client
0 commit comments