Skip to content

Commit a5b49e1

Browse files
subashkothacoderbirju
authored andcommitted
feat: automate healthchecks
Signed-off-by: Subash Kotha <[email protected]>
1 parent d775a8c commit a5b49e1

File tree

10 files changed

+541
-0
lines changed

10 files changed

+541
-0
lines changed

cmd/nerdctl/container/container_health_check_test.go

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,3 +602,122 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
602602

603603
testCase.Run(t)
604604
}
605+
606+
func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
607+
testCase := nerdtest.Setup()
608+
testCase.Require = require.Not(nerdtest.Docker)
609+
610+
testCase.SubTests = []*test.Case{
611+
//{
612+
// Description: "Basic healthy container with systemd-triggered healthcheck",
613+
// Setup: func(data test.Data, helpers test.Helpers) {
614+
// helpers.Ensure("run", "-d", "--name", data.Identifier(),
615+
// "--health-cmd", "echo healthy",
616+
// "--health-interval", "2s",
617+
// testutil.CommonImage, "sleep", "30")
618+
// // Wait for a couple of healthchecks to execute
619+
// time.Sleep(5 * time.Second)
620+
// },
621+
// Cleanup: func(data test.Data, helpers test.Helpers) {
622+
// helpers.Anyhow("rm", "-f", data.Identifier())
623+
// },
624+
// Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
625+
// return &test.Expected{
626+
// ExitCode: 0,
627+
// Output: expect.All(func(stdout, _ string, t *testing.T) {
628+
// inspect := nerdtest.InspectContainer(helpers, data.Identifier())
629+
// h := inspect.State.Health
630+
// assert.Assert(t, h != nil, "expected health state to be present")
631+
// assert.Equal(t, h.Status, "healthy")
632+
// assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
633+
// }),
634+
// }
635+
// },
636+
//},
637+
//{
638+
// Description: "Kill stops healthcheck execution",
639+
// Setup: func(data test.Data, helpers test.Helpers) {
640+
// helpers.Ensure("run", "-d", "--name", data.Identifier(),
641+
// "--health-cmd", "echo healthy",
642+
// "--health-interval", "1s",
643+
// testutil.CommonImage, "sleep", "30")
644+
// time.Sleep(5 * time.Second) // Wait for at least one health check to execute
645+
// helpers.Ensure("kill", data.Identifier()) // Kill the container
646+
// time.Sleep(3 * time.Second) // Wait to allow any potential extra healthchecks (shouldn't happen)
647+
// },
648+
// Cleanup: func(data test.Data, helpers test.Helpers) {
649+
// helpers.Anyhow("rm", "-f", data.Identifier())
650+
// },
651+
// Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
652+
// return &test.Expected{
653+
// ExitCode: 0,
654+
// Output: expect.All(func(stdout, _ string, t *testing.T) {
655+
// inspect := nerdtest.InspectContainer(helpers, data.Identifier())
656+
// h := inspect.State.Health
657+
// assert.Assert(t, h != nil, "expected health state to be present")
658+
// assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
659+
//
660+
// // Get container FinishedAt timestamp
661+
// containerEnd, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
662+
// assert.NilError(t, err, "parsing container FinishedAt")
663+
//
664+
// // Assert all healthcheck log start times are before container finished
665+
// for _, entry := range h.Log {
666+
// assert.NilError(t, err, "parsing healthcheck Start time")
667+
// assert.Assert(t, entry.Start.Before(containerEnd), "healthcheck ran after container was killed")
668+
// }
669+
// }),
670+
// }
671+
// },
672+
//},
673+
674+
// {
675+
// Description: "Pause/unpause halts and resumes healthcheck execution",
676+
// Setup: func(data test.Data, helpers test.Helpers) {
677+
// data.Labels().Set("cID", data.Identifier())
678+
// helpers.Ensure("run", "-d", "--name", data.Identifier(),
679+
// "--health-cmd", "echo healthy",
680+
// "--health-interval", "1s",
681+
// testutil.CommonImage, "sleep", "30")
682+
// time.Sleep(4 * time.Second)
683+
684+
// // Inspect using raw command
685+
// helpers.Command("container", "inspect", data.Labels().Get("cID")).
686+
// Run(&test.Expected{
687+
// ExitCode: expect.ExitCodeNoCheck,
688+
// Output: func(stdout string, _ string, t *testing.T) {
689+
// var dc []dockercompat.Container
690+
// err := json.Unmarshal([]byte(stdout), &dc)
691+
// assert.NilError(t, err)
692+
// assert.Equal(t, len(dc), 1)
693+
// h := dc[0].State.Health
694+
// assert.Assert(t, h != nil, "expected health state to be present")
695+
// data.Labels().Set("healthStatus", h.Status)
696+
// data.Labels().Set("logCount", strconv.Itoa(len(h.Log)))
697+
// fmt.Printf("📋 Setup Inspect: Status=%s, LogCount=%s\n", h.Status, strconv.Itoa(len(h.Log)))
698+
// },
699+
// })
700+
// },
701+
// Cleanup: func(data test.Data, helpers test.Helpers) {
702+
// helpers.Anyhow("rm", "-f", data.Identifier())
703+
// },
704+
// Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
705+
// return &test.Expected{
706+
// ExitCode: 0,
707+
// Output: expect.All(func(stdout, _ string, t *testing.T) {
708+
// before := data.Labels().Get("logCountBeforePause")
709+
// after := data.Labels().Get("logCountAfterUnpause")
710+
711+
// beforeCount, _ := strconv.Atoi(before)
712+
// afterCount, _ := strconv.Atoi(after)
713+
714+
// assert.Assert(t, afterCount > beforeCount,
715+
// "expected more healthchecks after unpause (got %d → %d)", beforeCount, afterCount)
716+
// }),
717+
// }
718+
// },
719+
// },
720+
}
721+
722+
testCase.Run(t)
723+
}

cmd/nerdctl/container/container_run.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package container
1919
import (
2020
"errors"
2121
"fmt"
22+
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
2223
"runtime"
2324
"strings"
2425

@@ -439,6 +440,14 @@ func runAction(cmd *cobra.Command, args []string) error {
439440
return err
440441
}
441442

443+
// Setup container healthchecks.
444+
if err := healthcheck.CreateTimer(ctx, c); err != nil {
445+
return fmt.Errorf("failed to create healthcheck timer: %w", err)
446+
}
447+
if err := healthcheck.StartTimer(ctx, c); err != nil {
448+
return fmt.Errorf("failed to start healthcheck timer: %w", err)
449+
}
450+
442451
if createOpt.Detach {
443452
fmt.Fprintln(createOpt.Stdout, id)
444453
return nil

pkg/cmd/container/kill.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
"github.com/containerd/nerdctl/v2/pkg/api/types"
3636
"github.com/containerd/nerdctl/v2/pkg/clientutil"
3737
"github.com/containerd/nerdctl/v2/pkg/containerutil"
38+
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
3839
"github.com/containerd/nerdctl/v2/pkg/idutil/containerwalker"
3940
"github.com/containerd/nerdctl/v2/pkg/labels"
4041
"github.com/containerd/nerdctl/v2/pkg/netutil"
@@ -111,6 +112,11 @@ func killContainer(ctx context.Context, container containerd.Container, signal s
111112
return err
112113
}
113114

115+
// Clean up healthcheck systemd units
116+
if err := healthcheck.RemoveTransientHealthCheckFiles(ctx, container); err != nil {
117+
log.G(ctx).Warnf("failed to clean up healthcheck units for container %s: %s", container.ID(), err)
118+
}
119+
114120
// signal will be sent once resume is finished
115121
if paused {
116122
if err := task.Resume(ctx); err != nil {

pkg/cmd/container/remove.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"encoding/json"
2222
"errors"
2323
"fmt"
24+
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
2425
"os"
2526
"syscall"
2627

@@ -179,6 +180,11 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions
179180
// Otherwise, nil the error so that we do not write the error label on the container
180181
retErr = nil
181182

183+
// Clean up healthcheck systemd units
184+
if err := healthcheck.RemoveTransientHealthCheckFiles(ctx, c); err != nil {
185+
log.G(ctx).WithError(err).Warnf("failed to clean up healthcheck units for container %q", id)
186+
}
187+
182188
// Now, delete the actual container
183189
var delOpts []containerd.DeleteOpts
184190
if _, err := c.Image(ctx); err == nil {

pkg/containerutil/containerutil.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
"github.com/containerd/nerdctl/v2/pkg/consoleutil"
4949
"github.com/containerd/nerdctl/v2/pkg/errutil"
5050
"github.com/containerd/nerdctl/v2/pkg/formatter"
51+
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
5152
"github.com/containerd/nerdctl/v2/pkg/ipcutil"
5253
"github.com/containerd/nerdctl/v2/pkg/labels"
5354
"github.com/containerd/nerdctl/v2/pkg/labels/k8slabels"
@@ -283,6 +284,15 @@ func Start(ctx context.Context, container containerd.Container, isAttach bool, i
283284
if err := task.Start(ctx); err != nil {
284285
return err
285286
}
287+
288+
// If container has health checks configured, create and start systemd timer/service files.
289+
if err := healthcheck.CreateTimer(ctx, container); err != nil {
290+
return fmt.Errorf("failed to create healthcheck timer: %w", err)
291+
}
292+
if err := healthcheck.StartTimer(ctx, container); err != nil {
293+
return fmt.Errorf("failed to start healthcheck timer: %w", err)
294+
}
295+
286296
if !isAttach {
287297
return nil
288298
}
@@ -351,6 +361,11 @@ func Stop(ctx context.Context, container containerd.Container, timeout *time.Dur
351361
}
352362
}()
353363

364+
// Clean up healthcheck units if configured.
365+
if err := healthcheck.RemoveTransientHealthCheckFiles(ctx, container); err != nil {
366+
return fmt.Errorf("failed to clean up healthcheck units for container %s", container.ID())
367+
}
368+
354369
if timeout == nil {
355370
t, ok := l[labels.StopTimeout]
356371
if !ok {
@@ -489,6 +504,11 @@ func Pause(ctx context.Context, client *containerd.Client, id string) error {
489504
return err
490505
}
491506

507+
// Clean up healthcheck units if configured.
508+
if err := healthcheck.RemoveTransientHealthCheckFiles(ctx, container); err != nil {
509+
return fmt.Errorf("failed to clean up healthcheck units for container %s", container.ID())
510+
}
511+
492512
switch status.Status {
493513
case containerd.Paused:
494514
return fmt.Errorf("container %s is already paused", id)
@@ -516,6 +536,14 @@ func Unpause(ctx context.Context, client *containerd.Client, id string) error {
516536
return err
517537
}
518538

539+
// Recreate healthcheck related systemd timer/service files.
540+
if err := healthcheck.CreateTimer(ctx, container); err != nil {
541+
return fmt.Errorf("failed to create healthcheck timer: %w", err)
542+
}
543+
if err := healthcheck.StartTimer(ctx, container); err != nil {
544+
return fmt.Errorf("failed to start healthcheck timer: %w", err)
545+
}
546+
519547
switch status.Status {
520548
case containerd.Paused:
521549
return task.Resume(ctx)
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package healthcheck
18+
19+
import (
20+
"github.com/containerd/nerdctl/mod/tigron/expect"
21+
"github.com/containerd/nerdctl/mod/tigron/require"
22+
"github.com/containerd/nerdctl/mod/tigron/test"
23+
"github.com/containerd/nerdctl/v2/pkg/testutil"
24+
"github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest"
25+
"gotest.tools/v3/assert"
26+
"testing"
27+
"time"
28+
)
29+
30+
func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
31+
testCase := nerdtest.Setup()
32+
testCase.Require = require.Not(nerdtest.Docker)
33+
34+
testCase.SubTests = []*test.Case{
35+
{
36+
Description: "Basic healthy container with systemd-triggered healthcheck",
37+
Setup: func(data test.Data, helpers test.Helpers) {
38+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
39+
"--health-cmd", "echo healthy",
40+
"--health-interval", "2s",
41+
testutil.CommonImage, "sleep", "30")
42+
// Wait for a couple of healthchecks to execute
43+
time.Sleep(10 * time.Second)
44+
},
45+
Cleanup: func(data test.Data, helpers test.Helpers) {
46+
helpers.Anyhow("rm", "-f", data.Identifier())
47+
},
48+
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
49+
return helpers.Command("inspect", data.Identifier())
50+
},
51+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
52+
return &test.Expected{
53+
ExitCode: 0,
54+
Output: expect.All(func(stdout, _ string, t *testing.T) {
55+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
56+
h := inspect.State.Health
57+
assert.Assert(t, h != nil, "expected health state to be present")
58+
assert.Equal(t, h.Status, "healthy")
59+
assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
60+
}),
61+
}
62+
},
63+
},
64+
}
65+
66+
testCase.Run(t)
67+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package healthcheck
18+
19+
import (
20+
"context"
21+
containerd "github.com/containerd/containerd/v2/client"
22+
)
23+
24+
// CreateTimer sets up the transient systemd timer and service for healthchecks.
25+
func CreateTimer(ctx context.Context, container containerd.Container) error {
26+
return nil
27+
}
28+
29+
// StartTimer starts the healthcheck timer unit.
30+
func StartTimer(ctx context.Context, container containerd.Container) error {
31+
return nil
32+
}
33+
34+
// RemoveTransientHealthCheckFiles stops and cleans up the transient timer and service.
35+
func RemoveTransientHealthCheckFiles(ctx context.Context, container containerd.Container) error {
36+
return nil
37+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package healthcheck
18+
19+
import (
20+
"context"
21+
containerd "github.com/containerd/containerd/v2/client"
22+
)
23+
24+
// CreateTimer sets up the transient systemd timer and service for healthchecks.
25+
func CreateTimer(ctx context.Context, container containerd.Container) error {
26+
return nil
27+
}
28+
29+
// StartTimer starts the healthcheck timer unit.
30+
func StartTimer(ctx context.Context, container containerd.Container) error {
31+
return nil
32+
}
33+
34+
// RemoveTransientHealthCheckFiles stops and cleans up the transient timer and service.
35+
func RemoveTransientHealthCheckFiles(ctx context.Context, container containerd.Container) error {
36+
return nil
37+
}

0 commit comments

Comments
 (0)