Skip to content

Commit 7d28dde

Browse files
author
Xuewei Zhang
committed
Add e2e test for OOM kill and Docker hung
Also fixes two minor bugs: 1. Change default Boskos wait timeout to 2 minutes. This is because the current test timeout is configured to 10 minutes. Running each test case taks 1-2 minutes, and each node will run 1-2 test cases. 5 minutes timeout on waiting for Boskos may cause a test timeout, which we want to avoid. 2. Create artifact subdir with 0755 rather than 0644. Because execution bit should be set on the directories.
1 parent 8b98d08 commit 7d28dde

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

test/e2e/metriconly/e2e_npd_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ var boskosProjectType = flag.String("boskos-project-type", "gce-project",
4949
"specifies which project type to select from Boskos.")
5050
var boskosServerURL = flag.String("boskos-server-url", "http://boskos.test-pods.svc.cluster.local",
5151
"specifies Boskos server URL.")
52-
var boskosWaitDuration = flag.Duration("boskos-wait-duration", 5*time.Minute,
52+
var boskosWaitDuration = flag.Duration("boskos-wait-duration", 2*time.Minute,
5353
"Duration to wait before quitting getting Boskos resource.")
5454

5555
var computeService *compute.Service

test/e2e/metriconly/metrics_test.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,32 @@ var _ = ginkgo.Describe("NPD should export Prometheus metrics.", func() {
127127
})
128128
})
129129

130+
ginkgo.Context("When OOM kills and docker hung happen", func() {
131+
132+
ginkgo.BeforeEach(func() {
133+
err := npd.WaitForNPD(instance, []string{"problem_gauge"}, 120)
134+
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Expect NPD to become ready in 120s, but hit error: %v", err))
135+
instance.RunCommandOrFail("sudo /home/kubernetes/bin/problem-maker --problem OOMKill")
136+
instance.RunCommandOrFail("sudo /home/kubernetes/bin/problem-maker --problem DockerHung")
137+
})
138+
139+
ginkgo.It("NPD should update problem_counter and problem_gauge", func() {
140+
time.Sleep(5 * time.Second)
141+
assertMetricValueInBound(instance,
142+
"problem_counter", map[string]string{"reason": "DockerHung"},
143+
1.0, 1.0)
144+
assertMetricValueInBound(instance,
145+
"problem_counter", map[string]string{"reason": "TaskHung"},
146+
1.0, 1.0)
147+
assertMetricValueInBound(instance,
148+
"problem_gauge", map[string]string{"reason": "DockerHung", "type": "KernelDeadlock"},
149+
1.0, 1.0)
150+
assertMetricValueInBound(instance,
151+
"problem_counter", map[string]string{"reason": "OOMKilling"},
152+
1.0, 1.0)
153+
})
154+
})
155+
130156
ginkgo.AfterEach(func() {
131157
defer func() {
132158
err := instance.DeleteInstance()
@@ -139,7 +165,7 @@ var _ = ginkgo.Describe("NPD should export Prometheus metrics.", func() {
139165
testSubdirName := strings.Replace(testText, " ", "_", -1)
140166

141167
artifactSubDir = path.Join(*artifactsDir, testSubdirName)
142-
err := os.MkdirAll(artifactSubDir, os.ModeDir|0644)
168+
err := os.MkdirAll(artifactSubDir, os.ModeDir|0755)
143169
if err != nil {
144170
fmt.Printf("Failed to create sub-directory to hold test artiface for test %s at %s\n",
145171
testText, artifactSubDir)

0 commit comments

Comments
 (0)