Skip to content

Commit 2c4eed4

Browse files
authored
Merge pull request #23 from coderbirju/automate_health_checks_feat
Automate health checks feat
2 parents ae7c0e5 + 0e46597 commit 2c4eed4

20 files changed

+881
-58
lines changed

cmd/nerdctl/container/container_create.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,6 @@ func createOptions(cmd *cobra.Command) (types.ContainerCreateOptions, error) {
279279
if err != nil {
280280
return opt, err
281281
}
282-
opt.HealthStartInterval, err = cmd.Flags().GetDuration("health-start-interval")
283-
if err != nil {
284-
return opt, err
285-
}
286282
opt.NoHealthcheck, err = cmd.Flags().GetBool("no-healthcheck")
287283
if err != nil {
288284
return opt, err

cmd/nerdctl/container/container_health_check_test.go renamed to cmd/nerdctl/container/container_health_check_linux_test.go

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@ import (
3232
"github.com/containerd/nerdctl/mod/tigron/tig"
3333

3434
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
35+
"github.com/containerd/nerdctl/v2/pkg/rootlessutil"
3536
"github.com/containerd/nerdctl/v2/pkg/testutil"
3637
"github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest"
3738
)
3839

3940
func TestContainerHealthCheckBasic(t *testing.T) {
41+
4042
testCase := nerdtest.Setup()
4143

4244
// Docker CLI does not provide a standalone healthcheck command.
@@ -602,3 +604,320 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
602604

603605
testCase.Run(t)
604606
}
607+
608+
func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
609+
testCase := nerdtest.Setup()
610+
testCase.Require = require.Not(nerdtest.Docker)
611+
// Skip systemd tests in rootless environment to bypass dbus permission issues
612+
if rootlessutil.IsRootless() {
613+
t.Skip("systemd healthcheck tests are skipped in rootless environment")
614+
}
615+
616+
testCase.SubTests = []*test.Case{
617+
{
618+
Description: "Basic healthy container with systemd-triggered healthcheck",
619+
Setup: func(data test.Data, helpers test.Helpers) {
620+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
621+
"--health-cmd", "echo healthy",
622+
"--health-interval", "2s",
623+
testutil.CommonImage, "sleep", "30")
624+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
625+
},
626+
Cleanup: func(data test.Data, helpers test.Helpers) {
627+
// Ensure proper cleanup of systemd units
628+
helpers.Anyhow("stop", data.Identifier())
629+
helpers.Anyhow("rm", "-f", data.Identifier())
630+
},
631+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
632+
return &test.Expected{
633+
ExitCode: 0,
634+
Output: expect.All(func(stdout string, t tig.T) {
635+
var h *healthcheck.Health
636+
637+
// Poll up to 5 times for health status
638+
maxAttempts := 5
639+
var finalStatus string
640+
641+
for i := 0; i < maxAttempts; i++ {
642+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
643+
h = inspect.State.Health
644+
645+
assert.Assert(t, h != nil, "expected health state to be present")
646+
finalStatus = h.Status
647+
648+
// If healthy, break and pass the test
649+
if finalStatus == "healthy" {
650+
t.Log(fmt.Sprintf("Container became healthy on attempt %d/%d", i+1, maxAttempts))
651+
break
652+
}
653+
654+
// If unhealthy, fail immediately
655+
if finalStatus == "unhealthy" {
656+
assert.Assert(t, false, fmt.Sprintf("Container became unhealthy on attempt %d/%d, status: %s", i+1, maxAttempts, finalStatus))
657+
return
658+
}
659+
660+
// If not the last attempt, wait before retrying
661+
if i < maxAttempts-1 {
662+
t.Log(fmt.Sprintf("Attempt %d/%d: status is '%s', waiting 1 second before retry", i+1, maxAttempts, finalStatus))
663+
time.Sleep(1 * time.Second)
664+
}
665+
}
666+
667+
if finalStatus != "healthy" {
668+
assert.Assert(t, false, fmt.Sprintf("Container did not become healthy after %d attempts, final status: %s", maxAttempts, finalStatus))
669+
return
670+
}
671+
672+
assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
673+
}),
674+
}
675+
},
676+
},
677+
{
678+
Description: "Kill stops healthcheck execution and cleans up systemd timer",
679+
Setup: func(data test.Data, helpers test.Helpers) {
680+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
681+
"--health-cmd", "echo healthy",
682+
"--health-interval", "1s",
683+
testutil.CommonImage, "sleep", "30")
684+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
685+
helpers.Ensure("kill", data.Identifier())
686+
},
687+
Cleanup: func(data test.Data, helpers test.Helpers) {
688+
// Container is already killed, just remove it
689+
helpers.Anyhow("rm", "-f", data.Identifier())
690+
},
691+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
692+
return &test.Expected{
693+
ExitCode: expect.ExitCodeNoCheck,
694+
Output: func(stdout string, t tig.T) {
695+
// Get container info for verification
696+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
697+
containerID := inspect.ID
698+
h := inspect.State.Health
699+
700+
// Verify health state and logs exist
701+
assert.Assert(t, h != nil, "expected health state to be present")
702+
assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
703+
704+
// Get container FinishedAt timestamp
705+
containerEnd, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
706+
assert.NilError(t, err, "parsing container FinishedAt")
707+
708+
// Assert all healthcheck log start times are before container finished
709+
for _, entry := range h.Log {
710+
assert.Assert(t, entry.Start.Before(containerEnd), "healthcheck ran after container was killed")
711+
}
712+
713+
// Ensure systemd timers are removed
714+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
715+
result.Run(&test.Expected{
716+
ExitCode: expect.ExitCodeNoCheck,
717+
Output: func(stdout string, _ tig.T) {
718+
assert.Assert(t, !strings.Contains(stdout, containerID),
719+
"expected nerdctl healthcheck timer for container ID %s to be removed after container stop", containerID)
720+
},
721+
})
722+
},
723+
}
724+
},
725+
},
726+
{
727+
Description: "Remove cleans up systemd timer",
728+
Setup: func(data test.Data, helpers test.Helpers) {
729+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
730+
"--health-cmd", "echo healthy",
731+
"--health-interval", "1s",
732+
testutil.CommonImage, "sleep", "30")
733+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
734+
helpers.Ensure("rm", "-f", data.Identifier())
735+
},
736+
Cleanup: func(data test.Data, helpers test.Helpers) {
737+
// Container is already removed, no cleanup needed
738+
helpers.Anyhow("rm", "-f", data.Identifier())
739+
},
740+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
741+
return &test.Expected{
742+
ExitCode: expect.ExitCodeNoCheck,
743+
Output: func(stdout string, t tig.T) {
744+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
745+
containerID := inspect.ID
746+
747+
// Check systemd timers to ensure cleanup
748+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
749+
result.Run(&test.Expected{
750+
ExitCode: expect.ExitCodeNoCheck,
751+
Output: func(stdout string, _ tig.T) {
752+
// Verify systemd timer has been cleaned up by checking systemctl output
753+
// We check that no timer contains our test identifier
754+
assert.Assert(t, !strings.Contains(stdout, containerID),
755+
"expected nerdctl healthcheck timer for container ID %s to be removed after container removal", containerID)
756+
},
757+
})
758+
},
759+
}
760+
},
761+
},
762+
{
763+
Description: "Stop cleans up systemd timer",
764+
Setup: func(data test.Data, helpers test.Helpers) {
765+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
766+
"--health-cmd", "echo healthy",
767+
"--health-interval", "1s",
768+
testutil.CommonImage, "sleep", "30")
769+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
770+
helpers.Ensure("stop", data.Identifier())
771+
},
772+
Cleanup: func(data test.Data, helpers test.Helpers) {
773+
// Container is already stopped, just remove it
774+
helpers.Anyhow("rm", "-f", data.Identifier())
775+
},
776+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
777+
return &test.Expected{
778+
ExitCode: expect.ExitCodeNoCheck,
779+
Output: func(stdout string, t tig.T) {
780+
// Get container info for verification
781+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
782+
containerID := inspect.ID
783+
784+
// Ensure systemd timers are removed
785+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
786+
result.Run(&test.Expected{
787+
ExitCode: expect.ExitCodeNoCheck,
788+
Output: func(stdout string, _ tig.T) {
789+
assert.Assert(t, !strings.Contains(stdout, containerID),
790+
"expected nerdctl healthcheck timer for container ID %s to be removed after container stop", containerID)
791+
},
792+
})
793+
},
794+
}
795+
},
796+
},
797+
}
798+
testCase.Run(t)
799+
}
800+
801+
func TestHealthCheck_SystemdIntegration_Advanced(t *testing.T) {
802+
803+
testCase := nerdtest.Setup()
804+
testCase.Require = require.Not(nerdtest.Docker)
805+
// Skip systemd tests in rootless environment to bypass dbus permission issues
806+
if rootlessutil.IsRootless() {
807+
t.Skip("systemd healthcheck tests are skipped in rootless environment")
808+
}
809+
810+
testCase.SubTests = []*test.Case{
811+
{
812+
// Tests that CreateTimer() successfully creates systemd timer units and
813+
// RemoveTransientHealthCheckFiles() properly cleans up units when container stops.
814+
Description: "Systemd timer unit creation and cleanup",
815+
Setup: func(data test.Data, helpers test.Helpers) {
816+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
817+
"--health-cmd", "echo healthy",
818+
"--health-interval", "1s",
819+
testutil.CommonImage, "sleep", "30")
820+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
821+
},
822+
Cleanup: func(data test.Data, helpers test.Helpers) {
823+
helpers.Anyhow("rm", "-f", data.Identifier())
824+
},
825+
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
826+
return helpers.Command("inspect", data.Identifier())
827+
},
828+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
829+
return &test.Expected{
830+
ExitCode: 0,
831+
Output: expect.All(func(stdout string, t tig.T) {
832+
// Get container ID and check systemd timer
833+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
834+
containerID := containerInspect.ID
835+
836+
// Check systemd timer
837+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
838+
result.Run(&test.Expected{
839+
ExitCode: expect.ExitCodeNoCheck,
840+
Output: func(stdout string, _ tig.T) {
841+
// Verify that a timer exists for this specific container
842+
assert.Assert(t, strings.Contains(stdout, containerID),
843+
"expected to find nerdctl healthcheck timer containing container ID: %s", containerID)
844+
},
845+
})
846+
// Stop container and verify cleanup
847+
helpers.Ensure("stop", data.Identifier())
848+
849+
// Check that timer is gone
850+
result = helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
851+
result.Run(&test.Expected{
852+
ExitCode: expect.ExitCodeNoCheck,
853+
Output: func(stdout string, _ tig.T) {
854+
assert.Assert(t, !strings.Contains(stdout, containerID),
855+
"expected nerdctl healthcheck timer for container ID %s to be removed after container stop", containerID)
856+
},
857+
})
858+
}),
859+
}
860+
},
861+
},
862+
{
863+
Description: "Container restart recreates systemd timer",
864+
Setup: func(data test.Data, helpers test.Helpers) {
865+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
866+
"--health-cmd", "echo restart-test",
867+
"--health-interval", "2s",
868+
testutil.CommonImage, "sleep", "60")
869+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
870+
},
871+
Cleanup: func(data test.Data, helpers test.Helpers) {
872+
helpers.Anyhow("rm", "-f", data.Identifier())
873+
},
874+
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
875+
// Get container ID for verification
876+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
877+
containerID := containerInspect.ID
878+
879+
// Step 1: Verify timer exists initially
880+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
881+
result.Run(&test.Expected{
882+
ExitCode: expect.ExitCodeNoCheck,
883+
Output: func(stdout string, t tig.T) {
884+
assert.Assert(t, strings.Contains(stdout, containerID),
885+
"expected timer for container %s to exist initially", containerID)
886+
},
887+
})
888+
889+
// Step 2: Stop container
890+
helpers.Ensure("stop", data.Identifier())
891+
892+
// Step 3: Verify timer is removed after stop
893+
result = helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
894+
result.Run(&test.Expected{
895+
ExitCode: expect.ExitCodeNoCheck,
896+
Output: func(stdout string, t tig.T) {
897+
assert.Assert(t, !strings.Contains(stdout, containerID),
898+
"expected timer for container %s to be removed after stop", containerID)
899+
},
900+
})
901+
902+
// Step 4: Restart container
903+
helpers.Ensure("start", data.Identifier())
904+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
905+
906+
// Step 5: Verify timer is recreated after restart - this is our final verification
907+
return helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
908+
},
909+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
910+
return &test.Expected{
911+
ExitCode: expect.ExitCodeNoCheck,
912+
Output: func(stdout string, t tig.T) {
913+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
914+
containerID := containerInspect.ID
915+
assert.Assert(t, strings.Contains(stdout, containerID),
916+
"expected timer for container %s to be recreated after restart", containerID)
917+
},
918+
}
919+
},
920+
},
921+
}
922+
testCase.Run(t)
923+
}

cmd/nerdctl/container/container_run.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737
"github.com/containerd/nerdctl/v2/pkg/containerutil"
3838
"github.com/containerd/nerdctl/v2/pkg/defaults"
3939
"github.com/containerd/nerdctl/v2/pkg/errutil"
40+
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
4041
"github.com/containerd/nerdctl/v2/pkg/labels"
4142
"github.com/containerd/nerdctl/v2/pkg/logging"
4243
"github.com/containerd/nerdctl/v2/pkg/netutil"
@@ -240,7 +241,6 @@ func setCreateFlags(cmd *cobra.Command) {
240241
cmd.Flags().Duration("health-timeout", 0, "Maximum time to allow one check to run (default: 30s)")
241242
cmd.Flags().Int("health-retries", 0, "Consecutive failures needed to report unhealthy (default: 3)")
242243
cmd.Flags().Duration("health-start-period", 0, "Start period for the container to initialize before starting health-retries countdown")
243-
cmd.Flags().Duration("health-start-interval", 0, "Time between running the checks during the start period")
244244
cmd.Flags().Bool("no-healthcheck", false, "Disable any container-specified HEALTHCHECK")
245245

246246
// #region env flags
@@ -445,6 +445,14 @@ func runAction(cmd *cobra.Command, args []string) error {
445445
return err
446446
}
447447

448+
// Setup container healthchecks.
449+
if err := healthcheck.CreateTimer(ctx, c); err != nil {
450+
return fmt.Errorf("failed to create healthcheck timer: %w", err)
451+
}
452+
if err := healthcheck.StartTimer(ctx, c); err != nil {
453+
return fmt.Errorf("failed to start healthcheck timer: %w", err)
454+
}
455+
448456
if createOpt.Detach {
449457
fmt.Fprintln(createOpt.Stdout, id)
450458
return nil

cmd/nerdctl/container/container_run_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,9 @@ func TestRunDomainname(t *testing.T) {
841841
}
842842

843843
func TestRunHealthcheckFlags(t *testing.T) {
844+
if rootlessutil.IsRootless() {
845+
t.Skip("healthcheck tests are skipped in rootless environment")
846+
}
844847
testCase := nerdtest.Setup()
845848

846849
testCases := []struct {
@@ -990,6 +993,9 @@ func TestRunHealthcheckFlags(t *testing.T) {
990993
}
991994

992995
func TestRunHealthcheckFromImage(t *testing.T) {
996+
if rootlessutil.IsRootless() {
997+
t.Skip("healthcheck tests are skipped in rootless environment")
998+
}
993999
nerdtest.Setup()
9941000

9951001
dockerfile := fmt.Sprintf(`FROM %s

0 commit comments

Comments
 (0)