Skip to content

Commit c9bbd85

Browse files
committed
generalize lease controller
1 parent c70e364 commit c9bbd85

File tree

4 files changed

+159
-57
lines changed

4 files changed

+159
-57
lines changed

pkg/kubelet/kubelet.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ import (
5555
"k8s.io/client-go/util/certificate"
5656
"k8s.io/client-go/util/flowcontrol"
5757
cloudprovider "k8s.io/cloud-provider"
58+
"k8s.io/component-helpers/lease"
5859
internalapi "k8s.io/cri-api/pkg/apis"
5960
"k8s.io/klog/v2"
6061
pluginwatcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
@@ -83,7 +84,6 @@ import (
8384
"k8s.io/kubernetes/pkg/kubelet/metrics"
8485
"k8s.io/kubernetes/pkg/kubelet/metrics/collectors"
8586
"k8s.io/kubernetes/pkg/kubelet/network/dns"
86-
"k8s.io/kubernetes/pkg/kubelet/nodelease"
8787
oomwatcher "k8s.io/kubernetes/pkg/kubelet/oom"
8888
"k8s.io/kubernetes/pkg/kubelet/pleg"
8989
"k8s.io/kubernetes/pkg/kubelet/pluginmanager"
@@ -168,6 +168,9 @@ const (
168168

169169
// Minimum number of dead containers to keep in a pod
170170
minDeadContainerInPod = 1
171+
172+
// nodeLeaseRenewIntervalFraction is the fraction of lease duration to renew the lease
173+
nodeLeaseRenewIntervalFraction = 0.25
171174
)
172175

173176
// SyncHandler is an interface implemented by Kubelet, for testability
@@ -780,7 +783,17 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
780783
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewNoNewPrivsAdmitHandler(klet.containerRuntime))
781784
klet.softAdmitHandlers.AddPodAdmitHandler(lifecycle.NewProcMountAdmitHandler(klet.containerRuntime))
782785

783-
klet.nodeLeaseController = nodelease.NewController(klet.clock, klet.heartbeatClient, string(klet.nodeName), kubeCfg.NodeLeaseDurationSeconds, klet.onRepeatedHeartbeatFailure)
786+
leaseDuration := time.Duration(kubeCfg.NodeLeaseDurationSeconds) * time.Second
787+
renewInterval := time.Duration(float64(leaseDuration) * nodeLeaseRenewIntervalFraction)
788+
klet.nodeLeaseController = lease.NewController(
789+
klet.clock,
790+
klet.heartbeatClient,
791+
string(klet.nodeName),
792+
kubeCfg.NodeLeaseDurationSeconds,
793+
klet.onRepeatedHeartbeatFailure,
794+
renewInterval,
795+
v1.NamespaceNodeLease,
796+
util.SetNodeOwnerFunc(klet.heartbeatClient, string(klet.nodeName)))
784797

785798
// Finally, put the most recent version of the config on the Kubelet, so
786799
// people can see how it was configured.
@@ -986,7 +999,7 @@ type Kubelet struct {
986999
updateRuntimeMux sync.Mutex
9871000

9881001
// nodeLeaseController claims and renews the node lease for this Kubelet
989-
nodeLeaseController nodelease.Controller
1002+
nodeLeaseController lease.Controller
9901003

9911004
// Generates pod events.
9921005
pleg pleg.PodLifecycleEventGenerator

pkg/kubelet/util/nodelease.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package util
18+
19+
import (
20+
"context"
21+
22+
coordinationv1 "k8s.io/api/coordination/v1"
23+
corev1 "k8s.io/api/core/v1"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
clientset "k8s.io/client-go/kubernetes"
26+
27+
"k8s.io/klog/v2"
28+
)
29+
30+
// SetNodeOwnerFunc helps construct a newLeasePostProcessFunc which sets
31+
// a node OwnerReference to the given lease object
32+
func SetNodeOwnerFunc(c clientset.Interface, nodeName string) func(lease *coordinationv1.Lease) error {
33+
return func(lease *coordinationv1.Lease) error {
34+
// Setting owner reference needs node's UID. Note that it is different from
35+
// kubelet.nodeRef.UID. When lease is initially created, it is possible that
36+
// the connection between master and node is not ready yet. So try to set
37+
// owner reference every time when renewing the lease, until successful.
38+
if len(lease.OwnerReferences) == 0 {
39+
if node, err := c.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}); err == nil {
40+
lease.OwnerReferences = []metav1.OwnerReference{
41+
{
42+
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
43+
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
44+
Name: nodeName,
45+
UID: node.UID,
46+
},
47+
}
48+
} else {
49+
klog.Errorf("failed to get node %q when trying to set owner ref to the node lease: %v", nodeName, err)
50+
return err
51+
}
52+
}
53+
return nil
54+
}
55+
}

staging/src/k8s.io/component-helpers/lease/controller.go

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
package nodelease
17+
package lease
1818

1919
import (
2020
"context"
2121
"fmt"
2222
"time"
2323

2424
coordinationv1 "k8s.io/api/coordination/v1"
25-
corev1 "k8s.io/api/core/v1"
2625
apierrors "k8s.io/apimachinery/pkg/api/errors"
2726
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2827
"k8s.io/apimachinery/pkg/util/clock"
@@ -35,71 +34,80 @@ import (
3534
)
3635

3736
const (
38-
// renewIntervalFraction is the fraction of lease duration to renew the lease
39-
renewIntervalFraction = 0.25
40-
// maxUpdateRetries is the number of immediate, successive retries the Kubelet will attempt
37+
// maxUpdateRetries is the number of immediate, successive retries the controller will attempt
4138
// when renewing the lease before it waits for the renewal interval before trying again,
4239
// similar to what we do for node status retries
4340
maxUpdateRetries = 5
4441
// maxBackoff is the maximum sleep time during backoff (e.g. in backoffEnsureLease)
4542
maxBackoff = 7 * time.Second
4643
)
4744

48-
// Controller manages creating and renewing the lease for this Kubelet
45+
// Controller manages creating and renewing the lease for this component (kube-apiserver, kubelet, etc.)
4946
type Controller interface {
5047
Run(stopCh <-chan struct{})
5148
}
5249

50+
// ProcessLeaseFunc processes the given lease in-place
51+
type ProcessLeaseFunc func(*coordinationv1.Lease) error
52+
5353
type controller struct {
5454
client clientset.Interface
5555
leaseClient coordclientset.LeaseInterface
5656
holderIdentity string
57+
leaseNamespace string
5758
leaseDurationSeconds int32
5859
renewInterval time.Duration
5960
clock clock.Clock
6061
onRepeatedHeartbeatFailure func()
6162

62-
// latestLease is the latest node lease which Kubelet updated or created
63+
// latestLease is the latest lease which the controller updated or created
6364
latestLease *coordinationv1.Lease
65+
66+
// newLeasePostProcessFunc allows customizing a lease object (e.g. setting OwnerReference)
67+
// before every time the lease is created/refreshed(updated). Note that an error will block
68+
// a lease CREATE, causing the controller to retry next time, but an error won't block a
69+
// lease UPDATE.
70+
newLeasePostProcessFunc ProcessLeaseFunc
6471
}
6572

6673
// NewController constructs and returns a controller
67-
func NewController(clock clock.Clock, client clientset.Interface, holderIdentity string, leaseDurationSeconds int32, onRepeatedHeartbeatFailure func()) Controller {
74+
func NewController(clock clock.Clock, client clientset.Interface, holderIdentity string, leaseDurationSeconds int32, onRepeatedHeartbeatFailure func(), renewInterval time.Duration, leaseNamespace string, newLeasePostProcessFunc ProcessLeaseFunc) Controller {
6875
var leaseClient coordclientset.LeaseInterface
6976
if client != nil {
70-
leaseClient = client.CoordinationV1().Leases(corev1.NamespaceNodeLease)
77+
leaseClient = client.CoordinationV1().Leases(leaseNamespace)
7178
}
72-
leaseDuration := time.Duration(leaseDurationSeconds) * time.Second
7379
return &controller{
7480
client: client,
7581
leaseClient: leaseClient,
7682
holderIdentity: holderIdentity,
83+
leaseNamespace: leaseNamespace,
7784
leaseDurationSeconds: leaseDurationSeconds,
78-
renewInterval: time.Duration(float64(leaseDuration) * renewIntervalFraction),
85+
renewInterval: renewInterval,
7986
clock: clock,
8087
onRepeatedHeartbeatFailure: onRepeatedHeartbeatFailure,
88+
newLeasePostProcessFunc: newLeasePostProcessFunc,
8189
}
8290
}
8391

8492
// Run runs the controller
8593
func (c *controller) Run(stopCh <-chan struct{}) {
8694
if c.leaseClient == nil {
87-
klog.Infof("node lease controller has nil lease client, will not claim or renew leases")
95+
klog.Infof("lease controller has nil lease client, will not claim or renew leases")
8896
return
8997
}
9098
wait.Until(c.sync, c.renewInterval, stopCh)
9199
}
92100

93101
func (c *controller) sync() {
94102
if c.latestLease != nil {
95-
// As long as node lease is not (or very rarely) updated by any other agent than Kubelet,
103+
// As long as the lease is not (or very rarely) updated by any other agent than the component itself,
96104
// we can optimistically assume it didn't change since our last update and try updating
97105
// based on the version from that time. Thanks to it we avoid GET call and reduce load
98106
// on etcd and kube-apiserver.
99107
// If at some point other agents will also be frequently updating the Lease object, this
100108
// can result in performance degradation, because we will end up with calling additional
101109
// GET/PUT - at this point this whole "if" should be removed.
102-
err := c.retryUpdateLease(c.newLease(c.latestLease))
110+
err := c.retryUpdateLease(c.latestLease)
103111
if err == nil {
104112
return
105113
}
@@ -133,7 +141,7 @@ func (c *controller) backoffEnsureLease() (*coordinationv1.Lease, bool) {
133141
break
134142
}
135143
sleep = minDuration(2*sleep, maxBackoff)
136-
klog.Errorf("failed to ensure node lease exists, will retry in %v, error: %v", sleep, err)
144+
klog.Errorf("failed to ensure lease exists, will retry in %v, error: %v", sleep, err)
137145
// backoff wait
138146
c.clock.Sleep(sleep)
139147
}
@@ -146,11 +154,11 @@ func (c *controller) ensureLease() (*coordinationv1.Lease, bool, error) {
146154
lease, err := c.leaseClient.Get(context.TODO(), c.holderIdentity, metav1.GetOptions{})
147155
if apierrors.IsNotFound(err) {
148156
// lease does not exist, create it.
149-
leaseToCreate := c.newLease(nil)
150-
if len(leaseToCreate.OwnerReferences) == 0 {
151-
// We want to ensure that a lease will always have OwnerReferences set.
152-
// Thus, given that we weren't able to set it correctly, we simply
153-
// not create it this time - we will retry in the next iteration.
157+
leaseToCreate, err := c.newLease(nil)
158+
// An error occurred during allocating the new lease (likely from newLeasePostProcessFunc).
159+
// Given that we weren't able to set the lease correctly, we simply
160+
// not create it this time - we will retry in the next iteration.
161+
if err != nil {
154162
return nil, false, nil
155163
}
156164
lease, err := c.leaseClient.Create(context.TODO(), leaseToCreate, metav1.CreateOptions{})
@@ -170,12 +178,13 @@ func (c *controller) ensureLease() (*coordinationv1.Lease, bool, error) {
170178
// call this once you're sure the lease has been created
171179
func (c *controller) retryUpdateLease(base *coordinationv1.Lease) error {
172180
for i := 0; i < maxUpdateRetries; i++ {
173-
lease, err := c.leaseClient.Update(context.TODO(), c.newLease(base), metav1.UpdateOptions{})
181+
leaseToUpdate, _ := c.newLease(base)
182+
lease, err := c.leaseClient.Update(context.TODO(), leaseToUpdate, metav1.UpdateOptions{})
174183
if err == nil {
175184
c.latestLease = lease
176185
return nil
177186
}
178-
klog.Errorf("failed to update node lease, error: %v", err)
187+
klog.Errorf("failed to update lease, error: %v", err)
179188
// OptimisticLockError requires getting the newer version of lease to proceed.
180189
if apierrors.IsConflict(err) {
181190
base, _ = c.backoffEnsureLease()
@@ -185,20 +194,22 @@ func (c *controller) retryUpdateLease(base *coordinationv1.Lease) error {
185194
c.onRepeatedHeartbeatFailure()
186195
}
187196
}
188-
return fmt.Errorf("failed %d attempts to update node lease", maxUpdateRetries)
197+
return fmt.Errorf("failed %d attempts to update lease", maxUpdateRetries)
189198
}
190199

191200
// newLease constructs a new lease if base is nil, or returns a copy of base
192201
// with desired state asserted on the copy.
193-
func (c *controller) newLease(base *coordinationv1.Lease) *coordinationv1.Lease {
202+
// Note that an error will block lease CREATE, causing the CREATE to be retried in
203+
// the next iteration; but the error won't block lease refresh (UPDATE).
204+
func (c *controller) newLease(base *coordinationv1.Lease) (*coordinationv1.Lease, error) {
194205
// Use the bare minimum set of fields; other fields exist for debugging/legacy,
195-
// but we don't need to make node heartbeats more complicated by using them.
206+
// but we don't need to make component heartbeats more complicated by using them.
196207
var lease *coordinationv1.Lease
197208
if base == nil {
198209
lease = &coordinationv1.Lease{
199210
ObjectMeta: metav1.ObjectMeta{
200211
Name: c.holderIdentity,
201-
Namespace: corev1.NamespaceNodeLease,
212+
Namespace: c.leaseNamespace,
202213
},
203214
Spec: coordinationv1.LeaseSpec{
204215
HolderIdentity: pointer.StringPtr(c.holderIdentity),
@@ -210,26 +221,12 @@ func (c *controller) newLease(base *coordinationv1.Lease) *coordinationv1.Lease
210221
}
211222
lease.Spec.RenewTime = &metav1.MicroTime{Time: c.clock.Now()}
212223

213-
// Setting owner reference needs node's UID. Note that it is different from
214-
// kubelet.nodeRef.UID. When lease is initially created, it is possible that
215-
// the connection between master and node is not ready yet. So try to set
216-
// owner reference every time when renewing the lease, until successful.
217-
if len(lease.OwnerReferences) == 0 {
218-
if node, err := c.client.CoreV1().Nodes().Get(context.TODO(), c.holderIdentity, metav1.GetOptions{}); err == nil {
219-
lease.OwnerReferences = []metav1.OwnerReference{
220-
{
221-
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
222-
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
223-
Name: c.holderIdentity,
224-
UID: node.UID,
225-
},
226-
}
227-
} else {
228-
klog.Errorf("failed to get node %q when trying to set owner ref to the node lease: %v", c.holderIdentity, err)
229-
}
224+
if c.newLeasePostProcessFunc != nil {
225+
err := c.newLeasePostProcessFunc(lease)
226+
return lease, err
230227
}
231228

232-
return lease
229+
return lease, nil
233230
}
234231

235232
func minDuration(a, b time.Duration) time.Duration {

0 commit comments

Comments
 (0)