Skip to content

Commit ed6f63a

Browse files
winterqtLuap99
authored andcommitted
libpod: fix healthchecks not executing every interval on linux
By default, systemd sets a limit of how many times a service can start, which means that if you have a healthcheck that runs more often than the limits, systemd will refuse to start it with a message like "Start request repeated too quickly." emitted to the journal. Signed-off-by: Winter M <[email protected]> Signed-off-by: Paul Holzinger <[email protected]>
1 parent 046206f commit ed6f63a

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

libpod/healthcheck_linux.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ func (c *Container) createTimer(interval string, isStartup bool) error {
4040
cmd = append(cmd, "--setenv=PATH="+path)
4141
}
4242

43-
cmd = append(cmd, "--unit", hcUnitName, fmt.Sprintf("--on-unit-inactive=%s", interval), "--timer-property=AccuracySec=1s", podman)
43+
// StartLimitIntervalSec=0 so we don't hit the restart limit
44+
cmd = append(cmd, "--unit", hcUnitName, fmt.Sprintf("--on-unit-inactive=%s", interval), "--timer-property=AccuracySec=1s", "--property=StartLimitIntervalSec=0", podman)
4445

4546
if logrus.IsLevelEnabled(logrus.DebugLevel) {
4647
cmd = append(cmd, "--log-level=debug", "--syslog")

test/system/220-healthcheck.bats

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\"
104104
assert "$cidmatch" =~ " $cid-[0-9a-f]+\.timer *.*/podman healthcheck run $cid" \
105105
"Healthcheck systemd unit exists"
106106

107+
# Check that the right service option is applied so we don't hit the systemd restart limit.
108+
# Even though the code sets StartLimitIntervalSec the systemd command prints StartLimitInterval*U*Sec
109+
run -0 systemctl show "$cid-*.service"
110+
assert "$output" =~ "StartLimitIntervalUSec=0" "The hc service has the right interval set"
111+
107112
current_time=$(date --iso-8601=ns)
108113
# After three successive failures, container should no longer be healthy
109114
_check_health $ctrname "Four or more failures" "

0 commit comments

Comments
 (0)