Skip to content

Commit 1003d36

Browse files
committed
Add random interval to nodeStatusReport interval every time after an actual node status change
update TestUpdateNodeStatusWithLease this time to avoid flakiness
1 parent 3184eb3 commit 1003d36

File tree

3 files changed

+100
-4
lines changed

3 files changed

+100
-4
lines changed

pkg/kubelet/kubelet.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,12 @@ type Kubelet struct {
12401240
// status to master. It is only used when node lease feature is enabled.
12411241
nodeStatusReportFrequency time.Duration
12421242

1243+
// delayAfterNodeStatusChange is the one-time random duration that we add to the next node status report interval
1244+
// every time when there's an actual node status change. But all future node status update that is not caused by
1245+
// real status change will stick with nodeStatusReportFrequency. The random duration is a uniform distribution over
1246+
// [-0.5*nodeStatusReportFrequency, 0.5*nodeStatusReportFrequency]
1247+
delayAfterNodeStatusChange time.Duration
1248+
12431249
// lastStatusReportTime is the time when node status was last reported.
12441250
lastStatusReportTime time.Time
12451251

pkg/kubelet/kubelet_node_status.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package kubelet
1919
import (
2020
"context"
2121
"fmt"
22+
"math/rand"
2223
"net"
2324
goruntime "runtime"
2425
"sort"
@@ -579,20 +580,42 @@ func (kl *Kubelet) tryUpdateNodeStatus(ctx context.Context, tryNumber int) error
579580
}
580581

581582
node, changed := kl.updateNode(ctx, originalNode)
582-
shouldPatchNodeStatus := changed || kl.clock.Since(kl.lastStatusReportTime) >= kl.nodeStatusReportFrequency
583-
584-
if !shouldPatchNodeStatus {
583+
// no need to update the status yet
584+
if !changed && !kl.isUpdateStatusPeriodExperid() {
585585
kl.markVolumesFromNode(node)
586586
return nil
587587
}
588588

589+
// We need to update the node status, if this is caused by a node change we want to calculate a new
590+
// random delay so we avoid all the nodes to reach the apiserver at the same time. If the update is not related
591+
// to a node change, because we run over the period, we reset the random delay so the node keeps updating
592+
// its status at the same cadence
593+
if changed {
594+
kl.delayAfterNodeStatusChange = kl.calculateDelay()
595+
} else {
596+
kl.delayAfterNodeStatusChange = 0
597+
}
589598
updatedNode, err := kl.patchNodeStatus(originalNode, node)
590599
if err == nil {
591600
kl.markVolumesFromNode(updatedNode)
592601
}
593602
return err
594603
}
595604

605+
func (kl *Kubelet) isUpdateStatusPeriodExperid() bool {
606+
if kl.lastStatusReportTime.IsZero() {
607+
return false
608+
}
609+
if kl.clock.Since(kl.lastStatusReportTime) >= kl.nodeStatusReportFrequency+kl.delayAfterNodeStatusChange {
610+
return true
611+
}
612+
return false
613+
}
614+
615+
func (kl *Kubelet) calculateDelay() time.Duration {
616+
return time.Duration(float64(kl.nodeStatusReportFrequency) * (-0.5 + rand.Float64()))
617+
}
618+
596619
// updateNode creates a copy of originalNode and runs update logic on it.
597620
// It returns the updated node object and a bool indicating if anything has been changed.
598621
func (kl *Kubelet) updateNode(ctx context.Context, originalNode *v1.Node) (*v1.Node, bool) {

pkg/kubelet/kubelet_node_status_test.go

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,10 @@ func TestUpdateNodeStatusWithLease(t *testing.T) {
849849
// Since this test retroactively overrides the stub container manager,
850850
// we have to regenerate default status setters.
851851
kubelet.setNodeStatusFuncs = kubelet.defaultNodeStatusFuncs()
852-
kubelet.nodeStatusReportFrequency = time.Minute
852+
// You will add up to 50% of nodeStatusReportFrequency of additional random latency for
853+
// kubelet to determine if update node status is needed due to time passage. We need to
854+
// take that into consideration to ensure this test pass all time.
855+
kubelet.nodeStatusReportFrequency = 30 * time.Second
853856

854857
kubeClient := testKubelet.fakeKubeClient
855858
existingNode := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}
@@ -3088,3 +3091,67 @@ func TestUpdateNodeAddresses(t *testing.T) {
30883091
})
30893092
}
30903093
}
3094+
3095+
func TestIsUpdateStatusPeriodExperid(t *testing.T) {
3096+
testcases := []struct {
3097+
name string
3098+
lastStatusReportTime time.Time
3099+
delayAfterNodeStatusChange time.Duration
3100+
expectExpired bool
3101+
}{
3102+
{
3103+
name: "no status update before and no delay",
3104+
lastStatusReportTime: time.Time{},
3105+
delayAfterNodeStatusChange: 0,
3106+
expectExpired: false,
3107+
},
3108+
{
3109+
name: "no status update before and existing delay",
3110+
lastStatusReportTime: time.Time{},
3111+
delayAfterNodeStatusChange: 30 * time.Second,
3112+
expectExpired: false,
3113+
},
3114+
{
3115+
name: "not expired and no delay",
3116+
lastStatusReportTime: time.Now().Add(-4 * time.Minute),
3117+
delayAfterNodeStatusChange: 0,
3118+
expectExpired: false,
3119+
},
3120+
{
3121+
name: "not expired",
3122+
lastStatusReportTime: time.Now().Add(-5 * time.Minute),
3123+
delayAfterNodeStatusChange: time.Minute,
3124+
expectExpired: false,
3125+
},
3126+
{
3127+
name: "expired",
3128+
lastStatusReportTime: time.Now().Add(-4 * time.Minute),
3129+
delayAfterNodeStatusChange: -2 * time.Minute,
3130+
expectExpired: true,
3131+
},
3132+
}
3133+
3134+
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
3135+
defer testKubelet.Cleanup()
3136+
kubelet := testKubelet.kubelet
3137+
kubelet.nodeStatusReportFrequency = 5 * time.Minute
3138+
3139+
for _, tc := range testcases {
3140+
kubelet.lastStatusReportTime = tc.lastStatusReportTime
3141+
kubelet.delayAfterNodeStatusChange = tc.delayAfterNodeStatusChange
3142+
expired := kubelet.isUpdateStatusPeriodExperid()
3143+
assert.Equal(t, tc.expectExpired, expired, tc.name)
3144+
}
3145+
}
3146+
3147+
func TestCalculateDelay(t *testing.T) {
3148+
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
3149+
defer testKubelet.Cleanup()
3150+
kubelet := testKubelet.kubelet
3151+
kubelet.nodeStatusReportFrequency = 5 * time.Minute
3152+
3153+
for i := 0; i < 100; i++ {
3154+
randomDelay := kubelet.calculateDelay()
3155+
assert.LessOrEqual(t, randomDelay.Abs(), kubelet.nodeStatusReportFrequency/2)
3156+
}
3157+
}

0 commit comments

Comments
 (0)