Skip to content

Commit 45a243e

Browse files
committed
Add node serial e2e tests that simulate the kubelet restart
This adds node e2e tests to make sure a completed init container is not restarted due to the kubelet restart.
1 parent 77e12ae commit 45a243e

File tree

2 files changed

+353
-1
lines changed

2 files changed

+353
-1
lines changed

test/e2e_node/container_lifecycle_pod_construction.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,12 +351,13 @@ func parseOutput(ctx context.Context, f *framework.Framework, pod *v1.Pod) conta
351351
sc := bufio.NewScanner(&buf)
352352
var res containerOutputList
353353
for sc.Scan() {
354+
log := sc.Text()
354355
fields := strings.Fields(sc.Text())
355356
if len(fields) < 3 {
356357
framework.ExpectNoError(fmt.Errorf("%v should have at least length 3", fields))
357358
}
358359
timestamp, err := time.Parse(time.RFC3339, fields[0])
359-
framework.ExpectNoError(err)
360+
framework.ExpectNoError(err, "Failed to parse the timestamp, log: %q", log)
360361
res = append(res, containerOutput{
361362
timestamp: timestamp,
362363
containerName: fields[1],

test/e2e_node/container_lifecycle_test.go

Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package e2enode
1919
import (
2020
"context"
2121
"fmt"
22+
"strings"
2223
"time"
2324

2425
"github.com/onsi/ginkgo/v2"
@@ -1047,6 +1048,356 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
10471048
framework.ExpectNoError(init2Restarted.IsBefore(init3Restarted))
10481049
framework.ExpectNoError(init3Restarted.IsBefore(regular1Restarted))
10491050
})
1051+
1052+
ginkgo.When("a Pod is initialized and running", func() {
1053+
var client *e2epod.PodClient
1054+
var err error
1055+
var pod *v1.Pod
1056+
init1 := "init-1"
1057+
init2 := "init-2"
1058+
init3 := "init-3"
1059+
regular1 := "regular-1"
1060+
1061+
ginkgo.BeforeEach(func(ctx context.Context) {
1062+
pod = &v1.Pod{
1063+
ObjectMeta: metav1.ObjectMeta{
1064+
Name: "initialized-pod",
1065+
},
1066+
Spec: v1.PodSpec{
1067+
RestartPolicy: v1.RestartPolicyAlways,
1068+
InitContainers: []v1.Container{
1069+
{
1070+
Name: init1,
1071+
Image: busyboxImage,
1072+
Command: ExecCommand(init1, execCommand{
1073+
Delay: 1,
1074+
ExitCode: 0,
1075+
}),
1076+
},
1077+
{
1078+
Name: init2,
1079+
Image: busyboxImage,
1080+
Command: ExecCommand(init2, execCommand{
1081+
Delay: 1,
1082+
ExitCode: 0,
1083+
}),
1084+
},
1085+
{
1086+
Name: init3,
1087+
Image: busyboxImage,
1088+
Command: ExecCommand(init3, execCommand{
1089+
Delay: 1,
1090+
ExitCode: 0,
1091+
}),
1092+
},
1093+
},
1094+
Containers: []v1.Container{
1095+
{
1096+
Name: regular1,
1097+
Image: busyboxImage,
1098+
Command: ExecCommand(regular1, execCommand{
1099+
Delay: 300,
1100+
ExitCode: 0,
1101+
}),
1102+
},
1103+
},
1104+
},
1105+
}
1106+
preparePod(pod)
1107+
1108+
client = e2epod.NewPodClient(f)
1109+
pod = client.Create(ctx, pod)
1110+
ginkgo.By("Waiting for the pod to be initialized and run")
1111+
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
1112+
framework.ExpectNoError(err)
1113+
})
1114+
1115+
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
1116+
ginkgo.By("stopping the kubelet")
1117+
startKubelet := stopKubelet()
1118+
// wait until the kubelet health check will fail
1119+
gomega.Eventually(ctx, func() bool {
1120+
return kubeletHealthCheck(kubeletHealthCheckURL)
1121+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
1122+
1123+
ginkgo.By("restarting the kubelet")
1124+
startKubelet()
1125+
// wait until the kubelet health check will succeed
1126+
gomega.Eventually(ctx, func() bool {
1127+
return kubeletHealthCheck(kubeletHealthCheckURL)
1128+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
1129+
1130+
ginkgo.By("ensuring that no completed init container is restarted")
1131+
gomega.Consistently(ctx, func() bool {
1132+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1133+
framework.ExpectNoError(err)
1134+
for _, status := range pod.Status.InitContainerStatuses {
1135+
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
1136+
continue
1137+
}
1138+
1139+
if status.RestartCount > 0 {
1140+
return false
1141+
}
1142+
}
1143+
return true
1144+
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
1145+
1146+
ginkgo.By("Parsing results")
1147+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1148+
framework.ExpectNoError(err)
1149+
results := parseOutput(ctx, f, pod)
1150+
1151+
ginkgo.By("Analyzing results")
1152+
framework.ExpectNoError(results.StartsBefore(init1, init2))
1153+
framework.ExpectNoError(results.ExitsBefore(init1, init2))
1154+
1155+
framework.ExpectNoError(results.StartsBefore(init2, init3))
1156+
framework.ExpectNoError(results.ExitsBefore(init2, init3))
1157+
1158+
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
1159+
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
1160+
gomega.Expect(pod.Status.InitContainerStatuses[2].RestartCount).To(gomega.Equal(int32(0)))
1161+
})
1162+
1163+
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
1164+
ginkgo.By("stopping the kubelet")
1165+
startKubelet := stopKubelet()
1166+
// wait until the kubelet health check will fail
1167+
gomega.Eventually(ctx, func() bool {
1168+
return kubeletHealthCheck(kubeletHealthCheckURL)
1169+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
1170+
1171+
ginkgo.By("removing the completed init container statuses from the container runtime")
1172+
rs, _, err := getCRIClient()
1173+
framework.ExpectNoError(err)
1174+
1175+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1176+
framework.ExpectNoError(err)
1177+
1178+
for _, c := range pod.Status.InitContainerStatuses {
1179+
if c.State.Terminated == nil || c.State.Terminated.ExitCode != 0 {
1180+
continue
1181+
}
1182+
1183+
tokens := strings.Split(c.ContainerID, "://")
1184+
gomega.Expect(tokens).To(gomega.HaveLen(2))
1185+
1186+
containerID := tokens[1]
1187+
1188+
err := rs.RemoveContainer(ctx, containerID)
1189+
framework.ExpectNoError(err)
1190+
}
1191+
1192+
ginkgo.By("restarting the kubelet")
1193+
startKubelet()
1194+
// wait until the kubelet health check will succeed
1195+
gomega.Eventually(ctx, func() bool {
1196+
return kubeletHealthCheck(kubeletHealthCheckURL)
1197+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
1198+
1199+
ginkgo.By("ensuring that no completed init container is restarted")
1200+
gomega.Consistently(ctx, func() bool {
1201+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1202+
framework.ExpectNoError(err)
1203+
for _, status := range pod.Status.InitContainerStatuses {
1204+
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
1205+
continue
1206+
}
1207+
1208+
if status.RestartCount > 0 {
1209+
return false
1210+
}
1211+
}
1212+
return true
1213+
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
1214+
1215+
ginkgo.By("Analyzing results")
1216+
// Cannot analyze the results with the container logs as the
1217+
// container statuses have been removed from container runtime.
1218+
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
1219+
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
1220+
gomega.Expect(pod.Status.InitContainerStatuses[2].RestartCount).To(gomega.Equal(int32(0)))
1221+
gomega.Expect(pod.Status.ContainerStatuses[0].State.Running).ToNot(gomega.BeNil())
1222+
})
1223+
})
1224+
1225+
ginkgo.When("a Pod is initializing the long-running init container", func() {
1226+
var client *e2epod.PodClient
1227+
var err error
1228+
var pod *v1.Pod
1229+
init1 := "init-1"
1230+
init2 := "init-2"
1231+
longRunningInit3 := "long-running-init-3"
1232+
regular1 := "regular-1"
1233+
1234+
ginkgo.BeforeEach(func(ctx context.Context) {
1235+
pod = &v1.Pod{
1236+
ObjectMeta: metav1.ObjectMeta{
1237+
Name: "initializing-long-running-init-container",
1238+
},
1239+
Spec: v1.PodSpec{
1240+
RestartPolicy: v1.RestartPolicyAlways,
1241+
InitContainers: []v1.Container{
1242+
{
1243+
Name: init1,
1244+
Image: busyboxImage,
1245+
Command: ExecCommand(init1, execCommand{
1246+
Delay: 1,
1247+
ExitCode: 0,
1248+
}),
1249+
},
1250+
{
1251+
Name: init2,
1252+
Image: busyboxImage,
1253+
Command: ExecCommand(init2, execCommand{
1254+
Delay: 1,
1255+
ExitCode: 0,
1256+
}),
1257+
},
1258+
{
1259+
Name: longRunningInit3,
1260+
Image: busyboxImage,
1261+
Command: ExecCommand(longRunningInit3, execCommand{
1262+
Delay: 300,
1263+
ExitCode: 0,
1264+
}),
1265+
},
1266+
},
1267+
Containers: []v1.Container{
1268+
{
1269+
Name: regular1,
1270+
Image: busyboxImage,
1271+
Command: ExecCommand(regular1, execCommand{
1272+
Delay: 300,
1273+
ExitCode: 0,
1274+
}),
1275+
},
1276+
},
1277+
},
1278+
}
1279+
preparePod(pod)
1280+
1281+
client = e2epod.NewPodClient(f)
1282+
pod = client.Create(ctx, pod)
1283+
ginkgo.By("Waiting for the pod to be initializing the long-running init container")
1284+
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "long-running init container initializing", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
1285+
for _, c := range pod.Status.InitContainerStatuses {
1286+
if c.Name != longRunningInit3 {
1287+
continue
1288+
}
1289+
if c.State.Running != nil && (c.Started != nil && *c.Started == true) {
1290+
return true, nil
1291+
}
1292+
}
1293+
return false, nil
1294+
})
1295+
framework.ExpectNoError(err)
1296+
})
1297+
1298+
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
1299+
ginkgo.By("stopping the kubelet")
1300+
startKubelet := stopKubelet()
1301+
// wait until the kubelet health check will fail
1302+
gomega.Eventually(ctx, func() bool {
1303+
return kubeletHealthCheck(kubeletHealthCheckURL)
1304+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
1305+
1306+
ginkgo.By("restarting the kubelet")
1307+
startKubelet()
1308+
// wait until the kubelet health check will succeed
1309+
gomega.Eventually(ctx, func() bool {
1310+
return kubeletHealthCheck(kubeletHealthCheckURL)
1311+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
1312+
1313+
ginkgo.By("ensuring that no completed init container is restarted")
1314+
gomega.Consistently(ctx, func() bool {
1315+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1316+
framework.ExpectNoError(err)
1317+
for _, status := range pod.Status.InitContainerStatuses {
1318+
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
1319+
continue
1320+
}
1321+
1322+
if status.RestartCount > 0 {
1323+
return false
1324+
}
1325+
}
1326+
return true
1327+
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
1328+
1329+
ginkgo.By("Parsing results")
1330+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1331+
framework.ExpectNoError(err)
1332+
results := parseOutput(ctx, f, pod)
1333+
1334+
ginkgo.By("Analyzing results")
1335+
framework.ExpectNoError(results.StartsBefore(init1, init2))
1336+
framework.ExpectNoError(results.ExitsBefore(init1, init2))
1337+
1338+
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
1339+
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
1340+
})
1341+
1342+
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
1343+
ginkgo.By("stopping the kubelet")
1344+
startKubelet := stopKubelet()
1345+
// wait until the kubelet health check will fail
1346+
gomega.Eventually(ctx, func() bool {
1347+
return kubeletHealthCheck(kubeletHealthCheckURL)
1348+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
1349+
1350+
ginkgo.By("removing the completed init container statuses from the container runtime")
1351+
rs, _, err := getCRIClient()
1352+
framework.ExpectNoError(err)
1353+
1354+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1355+
framework.ExpectNoError(err)
1356+
1357+
for _, c := range pod.Status.InitContainerStatuses {
1358+
if c.State.Terminated == nil || c.State.Terminated.ExitCode != 0 {
1359+
continue
1360+
}
1361+
1362+
tokens := strings.Split(c.ContainerID, "://")
1363+
gomega.Expect(tokens).To(gomega.HaveLen(2))
1364+
1365+
containerID := tokens[1]
1366+
1367+
err := rs.RemoveContainer(ctx, containerID)
1368+
framework.ExpectNoError(err)
1369+
}
1370+
1371+
ginkgo.By("restarting the kubelet")
1372+
startKubelet()
1373+
// wait until the kubelet health check will succeed
1374+
gomega.Eventually(ctx, func() bool {
1375+
return kubeletHealthCheck(kubeletHealthCheckURL)
1376+
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
1377+
1378+
ginkgo.By("ensuring that no completed init container is restarted")
1379+
gomega.Consistently(ctx, func() bool {
1380+
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
1381+
framework.ExpectNoError(err)
1382+
for _, status := range pod.Status.InitContainerStatuses {
1383+
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
1384+
continue
1385+
}
1386+
1387+
if status.RestartCount > 0 {
1388+
return false
1389+
}
1390+
}
1391+
return true
1392+
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
1393+
1394+
ginkgo.By("Analyzing results")
1395+
// Cannot analyze the results with the container logs as the
1396+
// container statuses have been removed from container runtime.
1397+
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
1398+
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
1399+
})
1400+
})
10501401
})
10511402

10521403
var _ = SIGDescribe(nodefeature.SidecarContainers, "Containers Lifecycle", func() {

0 commit comments

Comments
 (0)