Skip to content

Commit 9706c85

Browse files
committed
Update healthcheck orchestration logic
Signed-off-by: Arjun Raja Yogidas <[email protected]>
1 parent 3cdfccb commit 9706c85

18 files changed

+189
-273
lines changed

cmd/nerdctl/container/container_create.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,6 @@ func createOptions(cmd *cobra.Command) (types.ContainerCreateOptions, error) {
279279
if err != nil {
280280
return opt, err
281281
}
282-
opt.HealthStartInterval, err = cmd.Flags().GetDuration("health-start-interval")
283-
if err != nil {
284-
return opt, err
285-
}
286282
opt.NoHealthcheck, err = cmd.Flags().GetBool("no-healthcheck")
287283
if err != nil {
288284
return opt, err

cmd/nerdctl/container/container_health_check_test.go renamed to cmd/nerdctl/container/container_health_check_linux_test.go

Lines changed: 112 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package container
1919
import (
2020
"encoding/json"
2121
"errors"
22-
"fmt"
2322
"strings"
2423
"testing"
2524
"time"
@@ -391,43 +390,6 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
391390
}
392391
},
393392
},
394-
{
395-
Description: "Healthcheck emits large output repeatedly",
396-
Setup: func(data test.Data, helpers test.Helpers) {
397-
helpers.Ensure("run", "-d", "--name", data.Identifier(),
398-
"--health-cmd", "yes X | head -c 60000",
399-
"--health-interval", "1s", "--health-timeout", "2s",
400-
testutil.CommonImage, "sleep", nerdtest.Infinity)
401-
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
402-
},
403-
Cleanup: func(data test.Data, helpers test.Helpers) {
404-
helpers.Anyhow("rm", "-f", data.Identifier())
405-
},
406-
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
407-
for i := 0; i < 3; i++ {
408-
helpers.Ensure("container", "healthcheck", data.Identifier())
409-
time.Sleep(2 * time.Second)
410-
}
411-
return helpers.Command("inspect", data.Identifier())
412-
},
413-
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
414-
return &test.Expected{
415-
ExitCode: 0,
416-
Output: expect.All(func(_ string, t tig.T) {
417-
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
418-
h := inspect.State.Health
419-
debug, _ := json.MarshalIndent(h, "", " ")
420-
t.Log(string(debug))
421-
assert.Assert(t, h != nil, "expected health state")
422-
assert.Equal(t, h.Status, healthcheck.Healthy)
423-
assert.Assert(t, len(h.Log) >= 3, "expected at least 3 health log entries")
424-
for _, log := range h.Log {
425-
assert.Assert(t, len(log.Output) >= 1024, fmt.Sprintf("each output should be >= 1024 bytes, was: %s", log.Output))
426-
}
427-
}),
428-
}
429-
},
430-
},
431393
{
432394
Description: "Health log in inspect keeps only the latest 5 entries",
433395
Setup: func(data test.Data, helpers test.Helpers) {
@@ -603,121 +565,121 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
603565
testCase.Run(t)
604566
}
605567

606-
func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
607-
testCase := nerdtest.Setup()
608-
testCase.Require = require.Not(nerdtest.Docker)
568+
// func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
569+
// testCase := nerdtest.Setup()
570+
// testCase.Require = require.Not(nerdtest.Docker)
609571

610-
testCase.SubTests = []*test.Case{
611-
//{
612-
// Description: "Basic healthy container with systemd-triggered healthcheck",
613-
// Setup: func(data test.Data, helpers test.Helpers) {
614-
// helpers.Ensure("run", "-d", "--name", data.Identifier(),
615-
// "--health-cmd", "echo healthy",
616-
// "--health-interval", "2s",
617-
// testutil.CommonImage, "sleep", "30")
618-
// // Wait for a couple of healthchecks to execute
619-
// time.Sleep(5 * time.Second)
620-
// },
621-
// Cleanup: func(data test.Data, helpers test.Helpers) {
622-
// helpers.Anyhow("rm", "-f", data.Identifier())
623-
// },
624-
// Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
625-
// return &test.Expected{
626-
// ExitCode: 0,
627-
// Output: expect.All(func(stdout, _ string, t *testing.T) {
628-
// inspect := nerdtest.InspectContainer(helpers, data.Identifier())
629-
// h := inspect.State.Health
630-
// assert.Assert(t, h != nil, "expected health state to be present")
631-
// assert.Equal(t, h.Status, "healthy")
632-
// assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
633-
// }),
634-
// }
635-
// },
636-
//},
637-
//{
638-
// Description: "Kill stops healthcheck execution",
639-
// Setup: func(data test.Data, helpers test.Helpers) {
640-
// helpers.Ensure("run", "-d", "--name", data.Identifier(),
641-
// "--health-cmd", "echo healthy",
642-
// "--health-interval", "1s",
643-
// testutil.CommonImage, "sleep", "30")
644-
// time.Sleep(5 * time.Second) // Wait for at least one health check to execute
645-
// helpers.Ensure("kill", data.Identifier()) // Kill the container
646-
// time.Sleep(3 * time.Second) // Wait to allow any potential extra healthchecks (shouldn't happen)
647-
// },
648-
// Cleanup: func(data test.Data, helpers test.Helpers) {
649-
// helpers.Anyhow("rm", "-f", data.Identifier())
650-
// },
651-
// Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
652-
// return &test.Expected{
653-
// ExitCode: 0,
654-
// Output: expect.All(func(stdout, _ string, t *testing.T) {
655-
// inspect := nerdtest.InspectContainer(helpers, data.Identifier())
656-
// h := inspect.State.Health
657-
// assert.Assert(t, h != nil, "expected health state to be present")
658-
// assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
659-
//
660-
// // Get container FinishedAt timestamp
661-
// containerEnd, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
662-
// assert.NilError(t, err, "parsing container FinishedAt")
663-
//
664-
// // Assert all healthcheck log start times are before container finished
665-
// for _, entry := range h.Log {
666-
// assert.NilError(t, err, "parsing healthcheck Start time")
667-
// assert.Assert(t, entry.Start.Before(containerEnd), "healthcheck ran after container was killed")
668-
// }
669-
// }),
670-
// }
671-
// },
672-
//},
572+
// testCase.SubTests = []*test.Case{
573+
// //{
574+
// // Description: "Basic healthy container with systemd-triggered healthcheck",
575+
// // Setup: func(data test.Data, helpers test.Helpers) {
576+
// // helpers.Ensure("run", "-d", "--name", data.Identifier(),
577+
// // "--health-cmd", "echo healthy",
578+
// // "--health-interval", "2s",
579+
// // testutil.CommonImage, "sleep", "30")
580+
// // // Wait for a couple of healthchecks to execute
581+
// // time.Sleep(5 * time.Second)
582+
// // },
583+
// // Cleanup: func(data test.Data, helpers test.Helpers) {
584+
// // helpers.Anyhow("rm", "-f", data.Identifier())
585+
// // },
586+
// // Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
587+
// // return &test.Expected{
588+
// // ExitCode: 0,
589+
// // Output: expect.All(func(stdout, _ string, t *testing.T) {
590+
// // inspect := nerdtest.InspectContainer(helpers, data.Identifier())
591+
// // h := inspect.State.Health
592+
// // assert.Assert(t, h != nil, "expected health state to be present")
593+
// // assert.Equal(t, h.Status, "healthy")
594+
// // assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
595+
// // }),
596+
// // }
597+
// // },
598+
// //},
599+
// //{
600+
// // Description: "Kill stops healthcheck execution",
601+
// // Setup: func(data test.Data, helpers test.Helpers) {
602+
// // helpers.Ensure("run", "-d", "--name", data.Identifier(),
603+
// // "--health-cmd", "echo healthy",
604+
// // "--health-interval", "1s",
605+
// // testutil.CommonImage, "sleep", "30")
606+
// // time.Sleep(5 * time.Second) // Wait for at least one health check to execute
607+
// // helpers.Ensure("kill", data.Identifier()) // Kill the container
608+
// // time.Sleep(3 * time.Second) // Wait to allow any potential extra healthchecks (shouldn't happen)
609+
// // },
610+
// // Cleanup: func(data test.Data, helpers test.Helpers) {
611+
// // helpers.Anyhow("rm", "-f", data.Identifier())
612+
// // },
613+
// // Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
614+
// // return &test.Expected{
615+
// // ExitCode: 0,
616+
// // Output: expect.All(func(stdout, _ string, t *testing.T) {
617+
// // inspect := nerdtest.InspectContainer(helpers, data.Identifier())
618+
// // h := inspect.State.Health
619+
// // assert.Assert(t, h != nil, "expected health state to be present")
620+
// // assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
621+
// //
622+
// // // Get container FinishedAt timestamp
623+
// // containerEnd, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
624+
// // assert.NilError(t, err, "parsing container FinishedAt")
625+
// //
626+
// // // Assert all healthcheck log start times are before container finished
627+
// // for _, entry := range h.Log {
628+
// // assert.NilError(t, err, "parsing healthcheck Start time")
629+
// // assert.Assert(t, entry.Start.Before(containerEnd), "healthcheck ran after container was killed")
630+
// // }
631+
// // }),
632+
// // }
633+
// // },
634+
// //},
673635

674-
// {
675-
// Description: "Pause/unpause halts and resumes healthcheck execution",
676-
// Setup: func(data test.Data, helpers test.Helpers) {
677-
// data.Labels().Set("cID", data.Identifier())
678-
// helpers.Ensure("run", "-d", "--name", data.Identifier(),
679-
// "--health-cmd", "echo healthy",
680-
// "--health-interval", "1s",
681-
// testutil.CommonImage, "sleep", "30")
682-
// time.Sleep(4 * time.Second)
636+
// // {
637+
// // Description: "Pause/unpause halts and resumes healthcheck execution",
638+
// // Setup: func(data test.Data, helpers test.Helpers) {
639+
// // data.Labels().Set("cID", data.Identifier())
640+
// // helpers.Ensure("run", "-d", "--name", data.Identifier(),
641+
// // "--health-cmd", "echo healthy",
642+
// // "--health-interval", "1s",
643+
// // testutil.CommonImage, "sleep", "30")
644+
// // time.Sleep(4 * time.Second)
683645

684-
// // Inspect using raw command
685-
// helpers.Command("container", "inspect", data.Labels().Get("cID")).
686-
// Run(&test.Expected{
687-
// ExitCode: expect.ExitCodeNoCheck,
688-
// Output: func(stdout string, _ string, t *testing.T) {
689-
// var dc []dockercompat.Container
690-
// err := json.Unmarshal([]byte(stdout), &dc)
691-
// assert.NilError(t, err)
692-
// assert.Equal(t, len(dc), 1)
693-
// h := dc[0].State.Health
694-
// assert.Assert(t, h != nil, "expected health state to be present")
695-
// data.Labels().Set("healthStatus", h.Status)
696-
// data.Labels().Set("logCount", strconv.Itoa(len(h.Log)))
697-
// fmt.Printf("📋 Setup Inspect: Status=%s, LogCount=%s\n", h.Status, strconv.Itoa(len(h.Log)))
698-
// },
699-
// })
700-
// },
701-
// Cleanup: func(data test.Data, helpers test.Helpers) {
702-
// helpers.Anyhow("rm", "-f", data.Identifier())
703-
// },
704-
// Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
705-
// return &test.Expected{
706-
// ExitCode: 0,
707-
// Output: expect.All(func(stdout, _ string, t *testing.T) {
708-
// before := data.Labels().Get("logCountBeforePause")
709-
// after := data.Labels().Get("logCountAfterUnpause")
646+
// // // Inspect using raw command
647+
// // helpers.Command("container", "inspect", data.Labels().Get("cID")).
648+
// // Run(&test.Expected{
649+
// // ExitCode: expect.ExitCodeNoCheck,
650+
// // Output: func(stdout string, _ string, t *testing.T) {
651+
// // var dc []dockercompat.Container
652+
// // err := json.Unmarshal([]byte(stdout), &dc)
653+
// // assert.NilError(t, err)
654+
// // assert.Equal(t, len(dc), 1)
655+
// // h := dc[0].State.Health
656+
// // assert.Assert(t, h != nil, "expected health state to be present")
657+
// // data.Labels().Set("healthStatus", h.Status)
658+
// // data.Labels().Set("logCount", strconv.Itoa(len(h.Log)))
659+
// // fmt.Printf("📋 Setup Inspect: Status=%s, LogCount=%s\n", h.Status, strconv.Itoa(len(h.Log)))
660+
// // },
661+
// // })
662+
// // },
663+
// // Cleanup: func(data test.Data, helpers test.Helpers) {
664+
// // helpers.Anyhow("rm", "-f", data.Identifier())
665+
// // },
666+
// // Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
667+
// // return &test.Expected{
668+
// // ExitCode: 0,
669+
// // Output: expect.All(func(stdout, _ string, t *testing.T) {
670+
// // before := data.Labels().Get("logCountBeforePause")
671+
// // after := data.Labels().Get("logCountAfterUnpause")
710672

711-
// beforeCount, _ := strconv.Atoi(before)
712-
// afterCount, _ := strconv.Atoi(after)
673+
// // beforeCount, _ := strconv.Atoi(before)
674+
// // afterCount, _ := strconv.Atoi(after)
713675

714-
// assert.Assert(t, afterCount > beforeCount,
715-
// "expected more healthchecks after unpause (got %d → %d)", beforeCount, afterCount)
716-
// }),
717-
// }
718-
// },
719-
// },
720-
}
676+
// // assert.Assert(t, afterCount > beforeCount,
677+
// // "expected more healthchecks after unpause (got %d → %d)", beforeCount, afterCount)
678+
// // }),
679+
// // }
680+
// // },
681+
// // },
682+
// }
721683

722-
testCase.Run(t)
723-
}
684+
// testCase.Run(t)
685+
// }

cmd/nerdctl/container/container_run.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ package container
1919
import (
2020
"errors"
2121
"fmt"
22-
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
2322
"runtime"
2423
"strings"
2524

25+
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
26+
2627
"github.com/spf13/cobra"
2728
"golang.org/x/term"
2829

@@ -241,7 +242,6 @@ func setCreateFlags(cmd *cobra.Command) {
241242
cmd.Flags().Duration("health-timeout", 0, "Maximum time to allow one check to run (default: 30s)")
242243
cmd.Flags().Int("health-retries", 0, "Consecutive failures needed to report unhealthy (default: 3)")
243244
cmd.Flags().Duration("health-start-period", 0, "Start period for the container to initialize before starting health-retries countdown")
244-
cmd.Flags().Duration("health-start-interval", 0, "Time between running the checks during the start period")
245245
cmd.Flags().Bool("no-healthcheck", false, "Disable any container-specified HEALTHCHECK")
246246

247247
// #region env flags

cmd/nerdctl/helpers/flagutil.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ func ValidateHealthcheckFlags(options types.ContainerCreateOptions) error {
5252
options.HealthInterval != 0 ||
5353
options.HealthTimeout != 0 ||
5454
options.HealthRetries != 0 ||
55-
options.HealthStartPeriod != 0 ||
56-
options.HealthStartInterval != 0
55+
options.HealthStartPeriod != 0
5756

5857
if options.NoHealthcheck {
5958
if options.HealthCmd != "" || healthFlagsSet {
@@ -74,9 +73,6 @@ func ValidateHealthcheckFlags(options types.ContainerCreateOptions) error {
7473
if options.HealthStartPeriod < 0 {
7574
return fmt.Errorf("--health-start-period cannot be negative")
7675
}
77-
if options.HealthStartInterval < 0 {
78-
return fmt.Errorf("--health-start-interval cannot be negative")
79-
}
8076
return nil
8177
}
8278

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ require (
120120
github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 // indirect
121121
github.com/sasha-s/go-deadlock v0.3.5 // indirect
122122
//gomodjail:unconfined
123-
github.com/sirupsen/logrus v1.9.3 // indirect
123+
github.com/sirupsen/logrus v1.9.3
124124
github.com/smallstep/pkcs7 v0.1.1 // indirect
125125
github.com/spaolacci/murmur3 v1.1.0 // indirect
126126
github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 // indirect

pkg/api/types/container_types.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -285,13 +285,12 @@ type ContainerCreateOptions struct {
285285
ImagePullOpt ImagePullOptions
286286

287287
// Healthcheck related fields
288-
HealthCmd string
289-
HealthInterval time.Duration
290-
HealthTimeout time.Duration
291-
HealthRetries int
292-
HealthStartPeriod time.Duration
293-
HealthStartInterval time.Duration
294-
NoHealthcheck bool
288+
HealthCmd string
289+
HealthInterval time.Duration
290+
HealthTimeout time.Duration
291+
HealthRetries int
292+
HealthStartPeriod time.Duration
293+
NoHealthcheck bool
295294

296295
// UserNS name for user namespace mapping of container
297296
UserNS string

pkg/cmd/container/create.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -891,9 +891,6 @@ func withHealthcheck(options types.ContainerCreateOptions, ensuredImage *imgutil
891891
if options.HealthStartPeriod != 0 {
892892
hc.StartPeriod = options.HealthStartPeriod
893893
}
894-
if options.HealthStartInterval != 0 {
895-
hc.StartInterval = options.HealthStartInterval
896-
}
897894

898895
// If no healthcheck config is set (via CLI or image), return empty string so we skip adding to container config.
899896
if reflect.DeepEqual(hc, &healthcheck.Healthcheck{}) {

pkg/cmd/container/health_check.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ func HealthCheck(ctx context.Context, client *containerd.Client, container conta
5959
hcConfig.Interval = timeoutWithDefault(hcConfig.Interval, healthcheck.DefaultProbeInterval)
6060
hcConfig.Timeout = timeoutWithDefault(hcConfig.Timeout, healthcheck.DefaultProbeTimeout)
6161
hcConfig.StartPeriod = timeoutWithDefault(hcConfig.StartPeriod, healthcheck.DefaultStartPeriod)
62-
hcConfig.StartInterval = timeoutWithDefault(hcConfig.StartInterval, healthcheck.DefaultStartInterval)
6362
if hcConfig.Retries == 0 {
6463
hcConfig.Retries = healthcheck.DefaultProbeRetries
6564
}

0 commit comments

Comments
 (0)