Skip to content

Commit cc87438

Browse files
ffromanihaircommander
authored andcommitted
e2e_node: add a test to verify the kubelet starts
with systemd cgroup driver and cpumanager none policy. This was originally planned to be a correctness check for https://issues.k8s.io/125923, but it was difficult to reproduce the bug, so it's now a regression test against it. Signed-off-by: Francesco Romani <[email protected]> Signed-off-by: Peter Hunt <[email protected]>
1 parent 77d03e4 commit cc87438

File tree

1 file changed

+76
-0
lines changed

1 file changed

+76
-0
lines changed

test/e2e_node/node_container_manager_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,82 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
7676
framework.ExpectNoError(runTest(ctx, f))
7777
})
7878
})
79+
f.Describe("Validate CGroup management", func() {
80+
// Regression test for https://issues.k8s.io/125923
81+
// In this issue there's a race involved with systemd which seems to manifest most likely, or perhaps only
82+
// (data gathered so far seems inconclusive) on the very first boot of the machine, so restarting the kubelet
83+
// seems not sufficient. OTOH, the exact reproducer seems to require a dedicate lane with only this test, or
84+
// to reboot the machine before to run this test. Both are practically unrealistic in CI.
85+
// The closest approximation is this test in this current form, using a kubelet restart. This at least
86+
// acts as non regression testing, so it still brings value.
87+
ginkgo.It("should correctly start with cpumanager none policy in use with systemd", func(ctx context.Context) {
88+
if !IsCgroup2UnifiedMode() {
89+
ginkgo.Skip("this test requires cgroups v2")
90+
}
91+
92+
var err error
93+
var oldCfg *kubeletconfig.KubeletConfiguration
94+
// Get current kubelet configuration
95+
oldCfg, err = getCurrentKubeletConfig(ctx)
96+
framework.ExpectNoError(err)
97+
98+
ginkgo.DeferCleanup(func(ctx context.Context) {
99+
if oldCfg != nil {
100+
// Update the Kubelet configuration.
101+
ginkgo.By("Stopping the kubelet")
102+
startKubelet := stopKubelet()
103+
104+
// wait until the kubelet health check will fail
105+
gomega.Eventually(ctx, func() bool {
106+
return kubeletHealthCheck(kubeletHealthCheckURL)
107+
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
108+
ginkgo.By("Stopped the kubelet")
109+
110+
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
111+
112+
ginkgo.By("Starting the kubelet")
113+
startKubelet()
114+
115+
// wait until the kubelet health check will succeed
116+
gomega.Eventually(ctx, func(ctx context.Context) bool {
117+
return kubeletHealthCheck(kubeletHealthCheckURL)
118+
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
119+
ginkgo.By("Started the kubelet")
120+
}
121+
})
122+
123+
newCfg := oldCfg.DeepCopy()
124+
// Change existing kubelet configuration
125+
newCfg.CPUManagerPolicy = "none"
126+
newCfg.CgroupDriver = "systemd"
127+
newCfg.FailCgroupV1 = true // extra safety. We want to avoid false negatives though, so we added the skip check earlier
128+
129+
// Update the Kubelet configuration.
130+
ginkgo.By("Stopping the kubelet")
131+
startKubelet := stopKubelet()
132+
133+
// wait until the kubelet health check will fail
134+
gomega.Eventually(ctx, func() bool {
135+
return kubeletHealthCheck(kubeletHealthCheckURL)
136+
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
137+
ginkgo.By("Stopped the kubelet")
138+
139+
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
140+
141+
ginkgo.By("Starting the kubelet")
142+
startKubelet()
143+
144+
// wait until the kubelet health check will succeed
145+
gomega.Eventually(ctx, func() bool {
146+
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
147+
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
148+
ginkgo.By("Started the kubelet")
149+
150+
gomega.Consistently(ctx, func(ctx context.Context) bool {
151+
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
152+
}).WithTimeout(2 * time.Minute).WithPolling(2 * time.Second).Should(gomega.BeTrueBecause("node keeps reporting ready status"))
153+
})
154+
})
79155
})
80156

81157
func expectFileValToEqual(filePath string, expectedValue, delta int64) error {

0 commit comments

Comments
 (0)