Skip to content

Commit aa9a9e8

Browse files
committed
Stop StartupProbe explicity when successThrethold is reached
1 parent 1e827f4 commit aa9a9e8

File tree

3 files changed

+60
-2
lines changed

3 files changed

+60
-2
lines changed

pkg/kubelet/prober/common_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ func getTestRunningStatus() v1.PodStatus {
4646
return getTestRunningStatusWithStarted(true)
4747
}
4848

49+
func getTestNotRunningStatus() v1.PodStatus {
50+
return getTestRunningStatusWithStarted(false)
51+
}
52+
4953
func getTestRunningStatusWithStarted(started bool) v1.PodStatus {
5054
containerStatus := v1.ContainerStatus{
5155
Name: testContainerName,

pkg/kubelet/prober/worker.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,11 +316,14 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
316316

317317
w.resultsManager.Set(w.containerID, result, w.pod)
318318

319-
if (w.probeType == liveness || w.probeType == startup) && result == results.Failure {
319+
if (w.probeType == liveness && result == results.Failure) || w.probeType == startup {
320320
// The container fails a liveness/startup check, it will need to be restarted.
321321
// Stop probing until we see a new container ID. This is to reduce the
322322
// chance of hitting #21751, where running `docker exec` when a
323323
// container is being stopped may lead to corrupted container state.
324+
// In addition, if the container succeeds a startup probe, we should stop probing
325+
// until the container is restarted.
326+
// This is to prevent extra Probe executions #117153.
324327
w.onHold = true
325328
w.resultRun = 0
326329
}

pkg/kubelet/prober/worker_test.go

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,48 @@ func TestSuccessThreshold(t *testing.T) {
268268
}
269269
}
270270

271+
func TestStartupProbeSuccessThreshold(t *testing.T) {
272+
ctx := context.Background()
273+
m := newTestManager()
274+
successThreshold := 1
275+
failureThreshold := 3
276+
w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: int32(successThreshold), FailureThreshold: int32(failureThreshold)})
277+
m.statusManager.SetPodStatus(w.pod, getTestNotRunningStatus())
278+
279+
m.prober.exec = fakeExecProber{probe.Success, nil}
280+
281+
for i := 0; i < successThreshold+1; i++ {
282+
msg := fmt.Sprintf("%d success", successThreshold)
283+
expectContinue(t, w, w.doProbe(ctx), msg)
284+
expectResult(t, w, results.Success, msg)
285+
expectResultRun(t, w, 0, msg)
286+
}
287+
}
288+
289+
func TestStartupProbeFailureThreshold(t *testing.T) {
290+
ctx := context.Background()
291+
m := newTestManager()
292+
successThreshold := 1
293+
failureThreshold := 3
294+
w := newTestWorker(m, startup, v1.Probe{SuccessThreshold: int32(successThreshold), FailureThreshold: int32(failureThreshold)})
295+
m.statusManager.SetPodStatus(w.pod, getTestNotRunningStatus())
296+
297+
m.prober.exec = fakeExecProber{probe.Failure, nil}
298+
299+
for i := 0; i < failureThreshold+1; i++ {
300+
msg := fmt.Sprintf("%d failure", i+1)
301+
expectContinue(t, w, w.doProbe(ctx), msg)
302+
if i < failureThreshold-1 {
303+
expectResult(t, w, results.Unknown, msg)
304+
expectResultRun(t, w, i+1, msg)
305+
} else {
306+
msg := fmt.Sprintf("%d failure", failureThreshold)
307+
expectResult(t, w, results.Failure, msg)
308+
expectResultRun(t, w, 0, msg)
309+
}
310+
}
311+
}
312+
271313
func TestCleanUp(t *testing.T) {
272314
m := newTestManager()
273315

@@ -315,6 +357,13 @@ func expectResult(t *testing.T, w *worker, expectedResult results.Result, msg st
315357
}
316358
}
317359

360+
func expectResultRun(t *testing.T, w *worker, expectedResultRun int, msg string) {
361+
if w.resultRun != expectedResultRun {
362+
t.Errorf("[%s - %s] Expected result to be %v, but was %v",
363+
w.probeType, msg, expectedResultRun, w.resultRun)
364+
}
365+
}
366+
318367
func expectContinue(t *testing.T, w *worker, c bool, msg string) {
319368
if !c {
320369
t.Errorf("[%s - %s] Expected to continue, but did not", w.probeType, msg)
@@ -366,8 +415,10 @@ func TestOnHoldOnLivenessOrStartupCheckFailure(t *testing.T) {
366415
msg = "hold lifted"
367416
expectContinue(t, w, w.doProbe(ctx), msg)
368417
expectResult(t, w, results.Success, msg)
369-
if w.onHold {
418+
if probeType == liveness && w.onHold {
370419
t.Errorf("Prober should not be on hold anymore")
420+
} else if probeType == startup && !w.onHold {
421+
t.Errorf("Prober should be on hold due to %s check success", probeType)
371422
}
372423
}
373424
}

0 commit comments

Comments
 (0)