Skip to content

Commit 198ec57

Browse files
authored
Merge pull request kubernetes#128394 from mengqiy/spreadkubeletlaod
add randomness to nodeStatusReportFrequency for kubelet
2 parents 432a9af + d6e17ad commit 198ec57

File tree

3 files changed

+96
-3
lines changed

3 files changed

+96
-3
lines changed

pkg/kubelet/kubelet.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,12 @@ type Kubelet struct {
12141214
// status to master. It is only used when node lease feature is enabled.
12151215
nodeStatusReportFrequency time.Duration
12161216

1217+
// delayAfterNodeStatusChange is the one-time random duration that we add to the next node status report interval
1218+
// every time when there's an actual node status change. But all future node status update that is not caused by
1219+
// real status change will stick with nodeStatusReportFrequency. The random duration is a uniform distribution over
1220+
// [-0.5*nodeStatusReportFrequency, 0.5*nodeStatusReportFrequency]
1221+
delayAfterNodeStatusChange time.Duration
1222+
12171223
// lastStatusReportTime is the time when node status was last reported.
12181224
lastStatusReportTime time.Time
12191225

pkg/kubelet/kubelet_node_status.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package kubelet
1919
import (
2020
"context"
2121
"fmt"
22+
"math/rand"
2223
"net"
2324
goruntime "runtime"
2425
"sort"
@@ -579,20 +580,42 @@ func (kl *Kubelet) tryUpdateNodeStatus(ctx context.Context, tryNumber int) error
579580
}
580581

581582
node, changed := kl.updateNode(ctx, originalNode)
582-
shouldPatchNodeStatus := changed || kl.clock.Since(kl.lastStatusReportTime) >= kl.nodeStatusReportFrequency
583-
584-
if !shouldPatchNodeStatus {
583+
// no need to update the status yet
584+
if !changed && !kl.isUpdateStatusPeriodExperid() {
585585
kl.markVolumesFromNode(node)
586586
return nil
587587
}
588588

589+
// We need to update the node status, if this is caused by a node change we want to calculate a new
590+
// random delay so we avoid all the nodes to reach the apiserver at the same time. If the update is not related
591+
// to a node change, because we run over the period, we reset the random delay so the node keeps updating
592+
// its status at the same cadence
593+
if changed {
594+
kl.delayAfterNodeStatusChange = kl.calculateDelay()
595+
} else {
596+
kl.delayAfterNodeStatusChange = 0
597+
}
589598
updatedNode, err := kl.patchNodeStatus(originalNode, node)
590599
if err == nil {
591600
kl.markVolumesFromNode(updatedNode)
592601
}
593602
return err
594603
}
595604

605+
func (kl *Kubelet) isUpdateStatusPeriodExperid() bool {
606+
if kl.lastStatusReportTime.IsZero() {
607+
return false
608+
}
609+
if kl.clock.Since(kl.lastStatusReportTime) >= kl.nodeStatusReportFrequency+kl.delayAfterNodeStatusChange {
610+
return true
611+
}
612+
return false
613+
}
614+
615+
func (kl *Kubelet) calculateDelay() time.Duration {
616+
return time.Duration(float64(kl.nodeStatusReportFrequency) * (-0.5 + rand.Float64()))
617+
}
618+
596619
// updateNode creates a copy of originalNode and runs update logic on it.
597620
// It returns the updated node object and a bool indicating if anything has been changed.
598621
func (kl *Kubelet) updateNode(ctx context.Context, originalNode *v1.Node) (*v1.Node, bool) {

pkg/kubelet/kubelet_node_status_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3088,3 +3088,67 @@ func TestUpdateNodeAddresses(t *testing.T) {
30883088
})
30893089
}
30903090
}
3091+
3092+
func TestIsUpdateStatusPeriodExperid(t *testing.T) {
3093+
testcases := []struct {
3094+
name string
3095+
lastStatusReportTime time.Time
3096+
delayAfterNodeStatusChange time.Duration
3097+
expectExpired bool
3098+
}{
3099+
{
3100+
name: "no status update before and no delay",
3101+
lastStatusReportTime: time.Time{},
3102+
delayAfterNodeStatusChange: 0,
3103+
expectExpired: false,
3104+
},
3105+
{
3106+
name: "no status update before and existing delay",
3107+
lastStatusReportTime: time.Time{},
3108+
delayAfterNodeStatusChange: 30 * time.Second,
3109+
expectExpired: false,
3110+
},
3111+
{
3112+
name: "not expired and no delay",
3113+
lastStatusReportTime: time.Now().Add(-4 * time.Minute),
3114+
delayAfterNodeStatusChange: 0,
3115+
expectExpired: false,
3116+
},
3117+
{
3118+
name: "not expired",
3119+
lastStatusReportTime: time.Now().Add(-5 * time.Minute),
3120+
delayAfterNodeStatusChange: time.Minute,
3121+
expectExpired: false,
3122+
},
3123+
{
3124+
name: "expired",
3125+
lastStatusReportTime: time.Now().Add(-4 * time.Minute),
3126+
delayAfterNodeStatusChange: -2 * time.Minute,
3127+
expectExpired: true,
3128+
},
3129+
}
3130+
3131+
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
3132+
defer testKubelet.Cleanup()
3133+
kubelet := testKubelet.kubelet
3134+
kubelet.nodeStatusReportFrequency = 5 * time.Minute
3135+
3136+
for _, tc := range testcases {
3137+
kubelet.lastStatusReportTime = tc.lastStatusReportTime
3138+
kubelet.delayAfterNodeStatusChange = tc.delayAfterNodeStatusChange
3139+
expired := kubelet.isUpdateStatusPeriodExperid()
3140+
assert.Equal(t, tc.expectExpired, expired, tc.name)
3141+
}
3142+
}
3143+
3144+
func TestCalculateDelay(t *testing.T) {
3145+
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
3146+
defer testKubelet.Cleanup()
3147+
kubelet := testKubelet.kubelet
3148+
kubelet.nodeStatusReportFrequency = 5 * time.Minute
3149+
3150+
for i := 0; i < 100; i++ {
3151+
randomDelay := kubelet.calculateDelay()
3152+
assert.LessOrEqual(t, randomDelay.Abs(), kubelet.nodeStatusReportFrequency/2)
3153+
}
3154+
}

0 commit comments

Comments
 (0)