Skip to content

Commit 5fb6d2e

Browse files
committed
add healthcheck orchestration logic
Signed-off-by: Arjun Raja Yogidas <[email protected]>
1 parent 832c455 commit 5fb6d2e

20 files changed

+663
-79
lines changed

cmd/nerdctl/container/container_create.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,6 @@ func createOptions(cmd *cobra.Command) (types.ContainerCreateOptions, error) {
279279
if err != nil {
280280
return opt, err
281281
}
282-
opt.HealthStartInterval, err = cmd.Flags().GetDuration("health-start-interval")
283-
if err != nil {
284-
return opt, err
285-
}
286282
opt.NoHealthcheck, err = cmd.Flags().GetBool("no-healthcheck")
287283
if err != nil {
288284
return opt, err

cmd/nerdctl/container/container_health_check_test.go renamed to cmd/nerdctl/container/container_health_check_linux_test.go

Lines changed: 215 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package container
1919
import (
2020
"encoding/json"
2121
"errors"
22-
"fmt"
2322
"strings"
2423
"testing"
2524
"time"
@@ -32,11 +31,16 @@ import (
3231
"github.com/containerd/nerdctl/mod/tigron/tig"
3332

3433
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
34+
"github.com/containerd/nerdctl/v2/pkg/rootlessutil"
3535
"github.com/containerd/nerdctl/v2/pkg/testutil"
3636
"github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest"
3737
)
3838

3939
func TestContainerHealthCheckBasic(t *testing.T) {
40+
if rootlessutil.IsRootless() {
41+
t.Skip("healthcheck tests are skipped in rootless environment")
42+
}
43+
4044
testCase := nerdtest.Setup()
4145

4246
// Docker CLI does not provide a standalone healthcheck command.
@@ -134,6 +138,10 @@ func TestContainerHealthCheckBasic(t *testing.T) {
134138
}
135139

136140
func TestContainerHealthCheckAdvance(t *testing.T) {
141+
if rootlessutil.IsRootless() {
142+
t.Skip("healthcheck tests are skipped in rootless environment")
143+
}
144+
137145
testCase := nerdtest.Setup()
138146

139147
// Docker CLI does not provide a standalone healthcheck command.
@@ -391,43 +399,6 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
391399
}
392400
},
393401
},
394-
{
395-
Description: "Healthcheck emits large output repeatedly",
396-
Setup: func(data test.Data, helpers test.Helpers) {
397-
helpers.Ensure("run", "-d", "--name", data.Identifier(),
398-
"--health-cmd", "yes X | head -c 60000",
399-
"--health-interval", "1s", "--health-timeout", "2s",
400-
testutil.CommonImage, "sleep", nerdtest.Infinity)
401-
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
402-
},
403-
Cleanup: func(data test.Data, helpers test.Helpers) {
404-
helpers.Anyhow("rm", "-f", data.Identifier())
405-
},
406-
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
407-
for i := 0; i < 3; i++ {
408-
helpers.Ensure("container", "healthcheck", data.Identifier())
409-
time.Sleep(2 * time.Second)
410-
}
411-
return helpers.Command("inspect", data.Identifier())
412-
},
413-
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
414-
return &test.Expected{
415-
ExitCode: 0,
416-
Output: expect.All(func(_ string, t tig.T) {
417-
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
418-
h := inspect.State.Health
419-
debug, _ := json.MarshalIndent(h, "", " ")
420-
t.Log(string(debug))
421-
assert.Assert(t, h != nil, "expected health state")
422-
assert.Equal(t, h.Status, healthcheck.Healthy)
423-
assert.Assert(t, len(h.Log) >= 3, "expected at least 3 health log entries")
424-
for _, log := range h.Log {
425-
assert.Assert(t, len(log.Output) >= 1024, fmt.Sprintf("each output should be >= 1024 bytes, was: %s", log.Output))
426-
}
427-
}),
428-
}
429-
},
430-
},
431402
{
432403
Description: "Health log in inspect keeps only the latest 5 entries",
433404
Setup: func(data test.Data, helpers test.Helpers) {
@@ -602,3 +573,209 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
602573

603574
testCase.Run(t)
604575
}
576+
577+
func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
578+
testCase := nerdtest.Setup()
579+
testCase.Require = require.Not(nerdtest.Docker)
580+
581+
testCase.SubTests = []*test.Case{
582+
{
583+
Description: "Basic healthy container with systemd-triggered healthcheck",
584+
Setup: func(data test.Data, helpers test.Helpers) {
585+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
586+
"--health-cmd", "echo healthy",
587+
"--health-interval", "2s",
588+
testutil.CommonImage, "sleep", "30")
589+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
590+
// Wait for a healthcheck to execute
591+
time.Sleep(2 * time.Second)
592+
},
593+
Cleanup: func(data test.Data, helpers test.Helpers) {
594+
// Ensure proper cleanup of systemd units
595+
helpers.Anyhow("stop", data.Identifier())
596+
time.Sleep(500 * time.Millisecond) // Allow systemd cleanup
597+
helpers.Anyhow("rm", "-f", data.Identifier())
598+
},
599+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
600+
return &test.Expected{
601+
ExitCode: 0,
602+
Output: expect.All(func(stdout string, t tig.T) {
603+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
604+
h := inspect.State.Health
605+
assert.Assert(t, h != nil, "expected health state to be present")
606+
assert.Equal(t, h.Status, "healthy")
607+
assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
608+
}),
609+
}
610+
},
611+
},
612+
{
613+
Description: "Kill stops healthcheck execution",
614+
Setup: func(data test.Data, helpers test.Helpers) {
615+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
616+
"--health-cmd", "echo healthy",
617+
"--health-interval", "1s",
618+
testutil.CommonImage, "sleep", "30")
619+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
620+
time.Sleep(2 * time.Second) // Wait for at least one health check to execute
621+
helpers.Ensure("kill", data.Identifier()) // Kill the container
622+
time.Sleep(3 * time.Second) // Wait to allow any potential extra healthchecks (shouldn't happen)
623+
},
624+
Cleanup: func(data test.Data, helpers test.Helpers) {
625+
// Container is already killed, just remove it
626+
helpers.Anyhow("rm", "-f", data.Identifier())
627+
},
628+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
629+
return &test.Expected{
630+
ExitCode: 0,
631+
Output: expect.All(func(stdout string, t tig.T) {
632+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
633+
h := inspect.State.Health
634+
assert.Assert(t, h != nil, "expected health state to be present")
635+
assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
636+
637+
// Get container FinishedAt timestamp
638+
containerEnd, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
639+
assert.NilError(t, err, "parsing container FinishedAt")
640+
641+
// Assert all healthcheck log start times are before container finished
642+
for _, entry := range h.Log {
643+
assert.NilError(t, err, "parsing healthcheck Start time")
644+
assert.Assert(t, entry.Start.Before(containerEnd), "healthcheck ran after container was killed")
645+
}
646+
}),
647+
}
648+
},
649+
},
650+
}
651+
testCase.Run(t)
652+
}
653+
654+
func TestHealthCheck_SystemdIntegration_Advanced(t *testing.T) {
655+
if rootlessutil.IsRootless() {
656+
t.Skip("systemd healthcheck tests are skipped in rootless environment")
657+
}
658+
testCase := nerdtest.Setup()
659+
testCase.Require = require.Not(nerdtest.Docker)
660+
661+
testCase.SubTests = []*test.Case{
662+
{
663+
// Tests that CreateTimer() successfully creates systemd timer units and
664+
// RemoveTransientHealthCheckFiles() properly cleans up units when container stops.
665+
Description: "Systemd timer unit creation and cleanup",
666+
Setup: func(data test.Data, helpers test.Helpers) {
667+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
668+
"--health-cmd", "echo healthy",
669+
"--health-interval", "1s",
670+
testutil.CommonImage, "sleep", "30")
671+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
672+
// Wait longer for systemd timer creation and first healthcheck execution
673+
time.Sleep(3 * time.Second)
674+
},
675+
Cleanup: func(data test.Data, helpers test.Helpers) {
676+
helpers.Anyhow("rm", "-f", data.Identifier())
677+
},
678+
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
679+
return helpers.Command("inspect", data.Identifier())
680+
},
681+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
682+
return &test.Expected{
683+
ExitCode: 0,
684+
Output: expect.All(func(stdout string, t tig.T) {
685+
// Get container ID and check systemd timer
686+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
687+
containerID := containerInspect.ID
688+
689+
// Check systemd timer
690+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
691+
result.Run(&test.Expected{
692+
ExitCode: expect.ExitCodeNoCheck,
693+
Output: func(stdout string, _ tig.T) {
694+
// Verify that a timer exists for this specific container
695+
assert.Assert(t, strings.Contains(stdout, containerID),
696+
"expected to find nerdctl healthcheck timer containing container ID: %s", containerID)
697+
},
698+
})
699+
// Stop container and verify cleanup
700+
helpers.Ensure("stop", data.Identifier())
701+
time.Sleep(500 * time.Millisecond) // Allow cleanup to complete
702+
703+
// Check that timer is gone
704+
result = helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
705+
result.Run(&test.Expected{
706+
ExitCode: expect.ExitCodeNoCheck,
707+
Output: func(stdout string, _ tig.T) {
708+
assert.Assert(t, !strings.Contains(stdout, containerID),
709+
"expected nerdctl healthcheck timer for container ID %s to be removed after container stop", containerID)
710+
711+
},
712+
})
713+
}),
714+
}
715+
},
716+
},
717+
{
718+
Description: "Container restart recreates systemd timer",
719+
Setup: func(data test.Data, helpers test.Helpers) {
720+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
721+
"--health-cmd", "echo restart-test",
722+
"--health-interval", "2s",
723+
testutil.CommonImage, "sleep", "60")
724+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
725+
time.Sleep(3 * time.Second) // Wait for initial timer creation
726+
},
727+
Cleanup: func(data test.Data, helpers test.Helpers) {
728+
helpers.Anyhow("rm", "-f", data.Identifier())
729+
},
730+
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
731+
// Get container ID for verification
732+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
733+
containerID := containerInspect.ID
734+
735+
// Step 1: Verify timer exists initially
736+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
737+
result.Run(&test.Expected{
738+
ExitCode: expect.ExitCodeNoCheck,
739+
Output: func(stdout string, t tig.T) {
740+
assert.Assert(t, strings.Contains(stdout, containerID),
741+
"expected timer for container %s to exist initially", containerID)
742+
},
743+
})
744+
745+
// Step 2: Stop container
746+
helpers.Ensure("stop", data.Identifier())
747+
time.Sleep(1 * time.Second) // Allow cleanup
748+
749+
// Step 3: Verify timer is removed after stop
750+
result = helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
751+
result.Run(&test.Expected{
752+
ExitCode: expect.ExitCodeNoCheck,
753+
Output: func(stdout string, t tig.T) {
754+
assert.Assert(t, !strings.Contains(stdout, containerID),
755+
"expected timer for container %s to be removed after stop", containerID)
756+
},
757+
})
758+
759+
// Step 4: Restart container
760+
helpers.Ensure("start", data.Identifier())
761+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
762+
time.Sleep(3 * time.Second) // Wait for timer recreation
763+
764+
// Step 5: Verify timer is recreated after restart - this is our final verification
765+
return helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
766+
},
767+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
768+
return &test.Expected{
769+
ExitCode: expect.ExitCodeNoCheck,
770+
Output: func(stdout string, t tig.T) {
771+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
772+
containerID := containerInspect.ID
773+
assert.Assert(t, strings.Contains(stdout, containerID),
774+
"expected timer for container %s to be recreated after restart", containerID)
775+
},
776+
}
777+
},
778+
},
779+
}
780+
testCase.Run(t)
781+
}

cmd/nerdctl/container/container_run.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737
"github.com/containerd/nerdctl/v2/pkg/containerutil"
3838
"github.com/containerd/nerdctl/v2/pkg/defaults"
3939
"github.com/containerd/nerdctl/v2/pkg/errutil"
40+
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
4041
"github.com/containerd/nerdctl/v2/pkg/labels"
4142
"github.com/containerd/nerdctl/v2/pkg/logging"
4243
"github.com/containerd/nerdctl/v2/pkg/netutil"
@@ -240,7 +241,6 @@ func setCreateFlags(cmd *cobra.Command) {
240241
cmd.Flags().Duration("health-timeout", 0, "Maximum time to allow one check to run (default: 30s)")
241242
cmd.Flags().Int("health-retries", 0, "Consecutive failures needed to report unhealthy (default: 3)")
242243
cmd.Flags().Duration("health-start-period", 0, "Start period for the container to initialize before starting health-retries countdown")
243-
cmd.Flags().Duration("health-start-interval", 0, "Time between running the checks during the start period")
244244
cmd.Flags().Bool("no-healthcheck", false, "Disable any container-specified HEALTHCHECK")
245245

246246
// #region env flags
@@ -439,6 +439,14 @@ func runAction(cmd *cobra.Command, args []string) error {
439439
return err
440440
}
441441

442+
// Setup container healthchecks.
443+
if err := healthcheck.CreateTimer(ctx, c); err != nil {
444+
return fmt.Errorf("failed to create healthcheck timer: %w", err)
445+
}
446+
if err := healthcheck.StartTimer(ctx, c); err != nil {
447+
return fmt.Errorf("failed to start healthcheck timer: %w", err)
448+
}
449+
442450
if createOpt.Detach {
443451
fmt.Fprintln(createOpt.Stdout, id)
444452
return nil

cmd/nerdctl/container/container_run_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,9 @@ func TestRunDomainname(t *testing.T) {
841841
}
842842

843843
func TestRunHealthcheckFlags(t *testing.T) {
844+
if rootlessutil.IsRootless() {
845+
t.Skip("healthcheck tests are skipped in rootless environment")
846+
}
844847
testCase := nerdtest.Setup()
845848

846849
testCases := []struct {
@@ -990,6 +993,9 @@ func TestRunHealthcheckFlags(t *testing.T) {
990993
}
991994

992995
func TestRunHealthcheckFromImage(t *testing.T) {
996+
if rootlessutil.IsRootless() {
997+
t.Skip("healthcheck tests are skipped in rootless environment")
998+
}
993999
nerdtest.Setup()
9941000

9951001
dockerfile := fmt.Sprintf(`FROM %s

cmd/nerdctl/helpers/flagutil.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ func ValidateHealthcheckFlags(options types.ContainerCreateOptions) error {
5252
options.HealthInterval != 0 ||
5353
options.HealthTimeout != 0 ||
5454
options.HealthRetries != 0 ||
55-
options.HealthStartPeriod != 0 ||
56-
options.HealthStartInterval != 0
55+
options.HealthStartPeriod != 0
5756

5857
if options.NoHealthcheck {
5958
if options.HealthCmd != "" || healthFlagsSet {
@@ -74,9 +73,6 @@ func ValidateHealthcheckFlags(options types.ContainerCreateOptions) error {
7473
if options.HealthStartPeriod < 0 {
7574
return fmt.Errorf("--health-start-period cannot be negative")
7675
}
77-
if options.HealthStartInterval < 0 {
78-
return fmt.Errorf("--health-start-interval cannot be negative")
79-
}
8076
return nil
8177
}
8278

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ require (
120120
github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 // indirect
121121
github.com/sasha-s/go-deadlock v0.3.5 // indirect
122122
//gomodjail:unconfined
123-
github.com/sirupsen/logrus v1.9.3 // indirect
123+
github.com/sirupsen/logrus v1.9.3
124124
github.com/smallstep/pkcs7 v0.1.1 // indirect
125125
github.com/spaolacci/murmur3 v1.1.0 // indirect
126126
github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 // indirect

0 commit comments

Comments
 (0)