Skip to content

Automate health checks #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions cmd/nerdctl/container/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,6 @@ func createOptions(cmd *cobra.Command) (types.ContainerCreateOptions, error) {
if err != nil {
return opt, err
}
opt.HealthStartInterval, err = cmd.Flags().GetDuration("health-start-interval")
if err != nil {
return opt, err
}
opt.NoHealthcheck, err = cmd.Flags().GetBool("no-healthcheck")
if err != nil {
return opt, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package container
import (
"encoding/json"
"errors"
"fmt"
"strings"
"testing"
"time"
Expand All @@ -32,11 +31,16 @@ import (
"github.com/containerd/nerdctl/mod/tigron/tig"

"github.com/containerd/nerdctl/v2/pkg/healthcheck"
"github.com/containerd/nerdctl/v2/pkg/rootlessutil"
"github.com/containerd/nerdctl/v2/pkg/testutil"
"github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest"
)

func TestContainerHealthCheckBasic(t *testing.T) {
if rootlessutil.IsRootless() {
t.Skip("healthcheck tests are skipped in rootless environment")
}

testCase := nerdtest.Setup()

// Docker CLI does not provide a standalone healthcheck command.
Expand Down Expand Up @@ -134,6 +138,10 @@ func TestContainerHealthCheckBasic(t *testing.T) {
}

func TestContainerHealthCheckAdvance(t *testing.T) {
if rootlessutil.IsRootless() {
t.Skip("healthcheck tests are skipped in rootless environment")
}

testCase := nerdtest.Setup()

// Docker CLI does not provide a standalone healthcheck command.
Expand Down Expand Up @@ -391,43 +399,6 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
}
},
},
{
Description: "Healthcheck emits large output repeatedly",
Setup: func(data test.Data, helpers test.Helpers) {
helpers.Ensure("run", "-d", "--name", data.Identifier(),
"--health-cmd", "yes X | head -c 60000",
"--health-interval", "1s", "--health-timeout", "2s",
testutil.CommonImage, "sleep", nerdtest.Infinity)
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
},
Cleanup: func(data test.Data, helpers test.Helpers) {
helpers.Anyhow("rm", "-f", data.Identifier())
},
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
for i := 0; i < 3; i++ {
helpers.Ensure("container", "healthcheck", data.Identifier())
time.Sleep(2 * time.Second)
}
return helpers.Command("inspect", data.Identifier())
},
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
return &test.Expected{
ExitCode: 0,
Output: expect.All(func(_ string, t tig.T) {
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
h := inspect.State.Health
debug, _ := json.MarshalIndent(h, "", " ")
t.Log(string(debug))
assert.Assert(t, h != nil, "expected health state")
assert.Equal(t, h.Status, healthcheck.Healthy)
assert.Assert(t, len(h.Log) >= 3, "expected at least 3 health log entries")
for _, log := range h.Log {
assert.Assert(t, len(log.Output) >= 1024, fmt.Sprintf("each output should be >= 1024 bytes, was: %s", log.Output))
}
}),
}
},
},
{
Description: "Health log in inspect keeps only the latest 5 entries",
Setup: func(data test.Data, helpers test.Helpers) {
Expand Down Expand Up @@ -602,3 +573,122 @@ func TestContainerHealthCheckAdvance(t *testing.T) {

testCase.Run(t)
}

// func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
// testCase := nerdtest.Setup()
// testCase.Require = require.Not(nerdtest.Docker)

// testCase.SubTests = []*test.Case{
// //{
// // Description: "Basic healthy container with systemd-triggered healthcheck",
// // Setup: func(data test.Data, helpers test.Helpers) {
// // helpers.Ensure("run", "-d", "--name", data.Identifier(),
// // "--health-cmd", "echo healthy",
// // "--health-interval", "2s",
// // testutil.CommonImage, "sleep", "30")
// // // Wait for a couple of healthchecks to execute
// // time.Sleep(5 * time.Second)
// // },
// // Cleanup: func(data test.Data, helpers test.Helpers) {
// // helpers.Anyhow("rm", "-f", data.Identifier())
// // },
// // Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
// // return &test.Expected{
// // ExitCode: 0,
// // Output: expect.All(func(stdout, _ string, t *testing.T) {
// // inspect := nerdtest.InspectContainer(helpers, data.Identifier())
// // h := inspect.State.Health
// // assert.Assert(t, h != nil, "expected health state to be present")
// // assert.Equal(t, h.Status, "healthy")
// // assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
// // }),
// // }
// // },
// //},
// //{
// // Description: "Kill stops healthcheck execution",
// // Setup: func(data test.Data, helpers test.Helpers) {
// // helpers.Ensure("run", "-d", "--name", data.Identifier(),
// // "--health-cmd", "echo healthy",
// // "--health-interval", "1s",
// // testutil.CommonImage, "sleep", "30")
// // time.Sleep(5 * time.Second) // Wait for at least one health check to execute
// // helpers.Ensure("kill", data.Identifier()) // Kill the container
// // time.Sleep(3 * time.Second) // Wait to allow any potential extra healthchecks (shouldn't happen)
// // },
// // Cleanup: func(data test.Data, helpers test.Helpers) {
// // helpers.Anyhow("rm", "-f", data.Identifier())
// // },
// // Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
// // return &test.Expected{
// // ExitCode: 0,
// // Output: expect.All(func(stdout, _ string, t *testing.T) {
// // inspect := nerdtest.InspectContainer(helpers, data.Identifier())
// // h := inspect.State.Health
// // assert.Assert(t, h != nil, "expected health state to be present")
// // assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
// //
// // // Get container FinishedAt timestamp
// // containerEnd, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
// // assert.NilError(t, err, "parsing container FinishedAt")
// //
// // // Assert all healthcheck log start times are before container finished
// // for _, entry := range h.Log {
// // assert.NilError(t, err, "parsing healthcheck Start time")
// // assert.Assert(t, entry.Start.Before(containerEnd), "healthcheck ran after container was killed")
// // }
// // }),
// // }
// // },
// //},

// // {
// // Description: "Pause/unpause halts and resumes healthcheck execution",
// // Setup: func(data test.Data, helpers test.Helpers) {
// // data.Labels().Set("cID", data.Identifier())
// // helpers.Ensure("run", "-d", "--name", data.Identifier(),
// // "--health-cmd", "echo healthy",
// // "--health-interval", "1s",
// // testutil.CommonImage, "sleep", "30")
// // time.Sleep(4 * time.Second)

// // // Inspect using raw command
// // helpers.Command("container", "inspect", data.Labels().Get("cID")).
// // Run(&test.Expected{
// // ExitCode: expect.ExitCodeNoCheck,
// // Output: func(stdout string, _ string, t *testing.T) {
// // var dc []dockercompat.Container
// // err := json.Unmarshal([]byte(stdout), &dc)
// // assert.NilError(t, err)
// // assert.Equal(t, len(dc), 1)
// // h := dc[0].State.Health
// // assert.Assert(t, h != nil, "expected health state to be present")
// // data.Labels().Set("healthStatus", h.Status)
// // data.Labels().Set("logCount", strconv.Itoa(len(h.Log)))
// // fmt.Printf("📋 Setup Inspect: Status=%s, LogCount=%s\n", h.Status, strconv.Itoa(len(h.Log)))
// // },
// // })
// // },
// // Cleanup: func(data test.Data, helpers test.Helpers) {
// // helpers.Anyhow("rm", "-f", data.Identifier())
// // },
// // Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
// // return &test.Expected{
// // ExitCode: 0,
// // Output: expect.All(func(stdout, _ string, t *testing.T) {
// // before := data.Labels().Get("logCountBeforePause")
// // after := data.Labels().Get("logCountAfterUnpause")

// // beforeCount, _ := strconv.Atoi(before)
// // afterCount, _ := strconv.Atoi(after)

// // assert.Assert(t, afterCount > beforeCount,
// // "expected more healthchecks after unpause (got %d → %d)", beforeCount, afterCount)
// // }),
// // }
// // },
// // },
// }

// testCase.Run(t)
// }
16 changes: 11 additions & 5 deletions cmd/nerdctl/container/container_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,9 @@
"fmt"
"runtime"
"strings"

Check failure on line 24 in cmd/nerdctl/container/container_run.go

View workflow job for this annotation

GitHub Actions / go / windows

File is not properly formatted (gci)

Check failure on line 24 in cmd/nerdctl/container/container_run.go

View workflow job for this annotation

GitHub Actions / go / linux

File is not properly formatted (gci)

Check failure on line 24 in cmd/nerdctl/container/container_run.go

View workflow job for this annotation

GitHub Actions / go / linux (go canary)

File is not properly formatted (gci)

Check failure on line 24 in cmd/nerdctl/container/container_run.go

View workflow job for this annotation

GitHub Actions / go / freebsd

File is not properly formatted (gci)

Check failure on line 24 in cmd/nerdctl/container/container_run.go

View workflow job for this annotation

GitHub Actions / go / darwin

File is not properly formatted (gci)
"github.com/spf13/cobra"
"golang.org/x/term"

"github.com/containerd/console"
"github.com/containerd/log"

"github.com/containerd/nerdctl/v2/cmd/nerdctl/completion"
"github.com/containerd/nerdctl/v2/pkg/annotations"
"github.com/containerd/nerdctl/v2/pkg/api/types"
Expand All @@ -37,11 +33,14 @@
"github.com/containerd/nerdctl/v2/pkg/containerutil"
"github.com/containerd/nerdctl/v2/pkg/defaults"
"github.com/containerd/nerdctl/v2/pkg/errutil"
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
"github.com/containerd/nerdctl/v2/pkg/labels"
"github.com/containerd/nerdctl/v2/pkg/logging"
"github.com/containerd/nerdctl/v2/pkg/netutil"
"github.com/containerd/nerdctl/v2/pkg/signalutil"
"github.com/containerd/nerdctl/v2/pkg/taskutil"
"github.com/spf13/cobra"
"golang.org/x/term"
)

const (
Expand Down Expand Up @@ -240,7 +239,6 @@
cmd.Flags().Duration("health-timeout", 0, "Maximum time to allow one check to run (default: 30s)")
cmd.Flags().Int("health-retries", 0, "Consecutive failures needed to report unhealthy (default: 3)")
cmd.Flags().Duration("health-start-period", 0, "Start period for the container to initialize before starting health-retries countdown")
cmd.Flags().Duration("health-start-interval", 0, "Time between running the checks during the start period")
cmd.Flags().Bool("no-healthcheck", false, "Disable any container-specified HEALTHCHECK")

// #region env flags
Expand Down Expand Up @@ -439,6 +437,14 @@
return err
}

// Setup container healthchecks.
if err := healthcheck.CreateTimer(ctx, c); err != nil {
return fmt.Errorf("failed to create healthcheck timer: %w", err)
}
if err := healthcheck.StartTimer(ctx, c); err != nil {
return fmt.Errorf("failed to start healthcheck timer: %w", err)
}

if createOpt.Detach {
fmt.Fprintln(createOpt.Stdout, id)
return nil
Expand Down
6 changes: 6 additions & 0 deletions cmd/nerdctl/container/container_run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,9 @@ func TestRunDomainname(t *testing.T) {
}

func TestRunHealthcheckFlags(t *testing.T) {
if rootlessutil.IsRootless() {
t.Skip("healthcheck tests are skipped in rootless environment")
}
testCase := nerdtest.Setup()

testCases := []struct {
Expand Down Expand Up @@ -990,6 +993,9 @@ func TestRunHealthcheckFlags(t *testing.T) {
}

func TestRunHealthcheckFromImage(t *testing.T) {
if rootlessutil.IsRootless() {
t.Skip("healthcheck tests are skipped in rootless environment")
}
nerdtest.Setup()

dockerfile := fmt.Sprintf(`FROM %s
Expand Down
6 changes: 1 addition & 5 deletions cmd/nerdctl/helpers/flagutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ func ValidateHealthcheckFlags(options types.ContainerCreateOptions) error {
options.HealthInterval != 0 ||
options.HealthTimeout != 0 ||
options.HealthRetries != 0 ||
options.HealthStartPeriod != 0 ||
options.HealthStartInterval != 0
options.HealthStartPeriod != 0

if options.NoHealthcheck {
if options.HealthCmd != "" || healthFlagsSet {
Expand All @@ -74,9 +73,6 @@ func ValidateHealthcheckFlags(options types.ContainerCreateOptions) error {
if options.HealthStartPeriod < 0 {
return fmt.Errorf("--health-start-period cannot be negative")
}
if options.HealthStartInterval < 0 {
return fmt.Errorf("--health-start-interval cannot be negative")
}
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ require (
github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 // indirect
github.com/sasha-s/go-deadlock v0.3.5 // indirect
//gomodjail:unconfined
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/sirupsen/logrus v1.9.3
github.com/smallstep/pkcs7 v0.1.1 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 // indirect
Expand Down
13 changes: 6 additions & 7 deletions pkg/api/types/container_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,12 @@ type ContainerCreateOptions struct {
ImagePullOpt ImagePullOptions

// Healthcheck related fields
HealthCmd string
HealthInterval time.Duration
HealthTimeout time.Duration
HealthRetries int
HealthStartPeriod time.Duration
HealthStartInterval time.Duration
NoHealthcheck bool
HealthCmd string
HealthInterval time.Duration
HealthTimeout time.Duration
HealthRetries int
HealthStartPeriod time.Duration
NoHealthcheck bool

// UserNS name for user namespace mapping of container
UserNS string
Expand Down
3 changes: 0 additions & 3 deletions pkg/cmd/container/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -891,9 +891,6 @@ func withHealthcheck(options types.ContainerCreateOptions, ensuredImage *imgutil
if options.HealthStartPeriod != 0 {
hc.StartPeriod = options.HealthStartPeriod
}
if options.HealthStartInterval != 0 {
hc.StartInterval = options.HealthStartInterval
}

// If no healthcheck config is set (via CLI or image), return empty string so we skip adding to container config.
if reflect.DeepEqual(hc, &healthcheck.Healthcheck{}) {
Expand Down
1 change: 0 additions & 1 deletion pkg/cmd/container/health_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ func HealthCheck(ctx context.Context, client *containerd.Client, container conta
hcConfig.Interval = timeoutWithDefault(hcConfig.Interval, healthcheck.DefaultProbeInterval)
hcConfig.Timeout = timeoutWithDefault(hcConfig.Timeout, healthcheck.DefaultProbeTimeout)
hcConfig.StartPeriod = timeoutWithDefault(hcConfig.StartPeriod, healthcheck.DefaultStartPeriod)
hcConfig.StartInterval = timeoutWithDefault(hcConfig.StartInterval, healthcheck.DefaultStartInterval)
if hcConfig.Retries == 0 {
hcConfig.Retries = healthcheck.DefaultProbeRetries
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/cmd/container/kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/containerd/nerdctl/v2/pkg/api/types"
"github.com/containerd/nerdctl/v2/pkg/clientutil"
"github.com/containerd/nerdctl/v2/pkg/containerutil"
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
"github.com/containerd/nerdctl/v2/pkg/idutil/containerwalker"
"github.com/containerd/nerdctl/v2/pkg/labels"
"github.com/containerd/nerdctl/v2/pkg/netutil"
Expand Down Expand Up @@ -111,6 +112,11 @@ func killContainer(ctx context.Context, container containerd.Container, signal s
return err
}

// Clean up healthcheck systemd units
if err := healthcheck.RemoveTransientHealthCheckFiles(ctx, container); err != nil {
log.G(ctx).Warnf("failed to clean up healthcheck units for container %s: %s", container.ID(), err)
}

// signal will be sent once resume is finished
if paused {
if err := task.Resume(ctx); err != nil {
Expand Down
6 changes: 6 additions & 0 deletions pkg/cmd/container/remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/containerd/nerdctl/v2/pkg/clientutil"
"github.com/containerd/nerdctl/v2/pkg/containerutil"
"github.com/containerd/nerdctl/v2/pkg/dnsutil/hostsstore"
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
"github.com/containerd/nerdctl/v2/pkg/idutil/containerwalker"
"github.com/containerd/nerdctl/v2/pkg/ipcutil"
"github.com/containerd/nerdctl/v2/pkg/labels"
Expand Down Expand Up @@ -179,6 +180,11 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions
// Otherwise, nil the error so that we do not write the error label on the container
retErr = nil

// Clean up healthcheck systemd units
if err := healthcheck.RemoveTransientHealthCheckFiles(ctx, c); err != nil {
log.G(ctx).WithError(err).Warnf("failed to clean up healthcheck units for container %q", id)
}

// Now, delete the actual container
var delOpts []containerd.DeleteOpts
if _, err := c.Image(ctx); err == nil {
Expand Down
Loading
Loading