Skip to content

Commit 96891ab

Browse files
earl-warrenearl-warren
authored andcommitted
feat: wait for services to be healthy before starting a job (#805)
If a --health-cmd is defined for a container, block until its status is healthy or unhealthy. The timeout is defined by the server internal logic based on associated --health-* defined delays. If it blocks indefinitely, the job timeout will eventually cancel it. While waiting, the simplest solution would be to sleep 1 second until the container is healthy or unhealthy. To minimize log verbosity, the sleep interval is instead set to --health-interval and default to one second if it is not defined. This logic does not apply to host containers as they do not support services. They are assumed to always be healthy. If --health-cmd is set for the container running a job, the first step will start to run without waiting for the container to become healthy. There may be valid use cases for that but they are not the focus of this implementation. <!--start release-notes-assistant--> <!--URL:https://code.forgejo.org/forgejo/runner--> - features - [PR](https://code.forgejo.org/forgejo/runner/pulls/805): <!--number 805 --><!--line 0 --><!--description ZmVhdDogd2FpdCBmb3Igc2VydmljZXMgdG8gYmUgaGVhbHRoeSBiZWZvcmUgc3RhcnRpbmcgYSBqb2I=-->feat: wait for services to be healthy before starting a job<!--description--> <!--end release-notes-assistant--> Reviewed-on: https://code.forgejo.org/forgejo/runner/pulls/805 Co-authored-by: Earl Warren <[email protected]> Co-committed-by: Earl Warren <[email protected]>
1 parent 8644cc9 commit 96891ab

File tree

10 files changed

+240
-2
lines changed

10 files changed

+240
-2
lines changed

act/container/container_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package container
33
import (
44
"context"
55
"io"
6+
"time"
67

78
"code.forgejo.org/forgejo/runner/v9/act/common"
89
"github.com/docker/go-connections/nat"
@@ -63,6 +64,7 @@ type Container interface {
6364
Remove() common.Executor
6465
Close() common.Executor
6566
ReplaceLogWriter(io.Writer, io.Writer) (io.Writer, io.Writer)
67+
IsHealthy(ctx context.Context) (time.Duration, error)
6668
}
6769

6870
// NewDockerBuildExecutorInput the input for the NewDockerBuildExecutor function

act/container/docker_run.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"runtime"
1616
"strconv"
1717
"strings"
18+
"time"
1819

1920
"github.com/Masterminds/semver"
2021
"github.com/docker/cli/cli/compose/loader"
@@ -191,6 +192,47 @@ func (cr *containerReference) Remove() common.Executor {
191192
).IfNot(common.Dryrun)
192193
}
193194

195+
func (cr *containerReference) inspect(ctx context.Context) (container.InspectResponse, error) {
196+
resp, err := cr.cli.ContainerInspect(ctx, cr.id)
197+
if err != nil {
198+
err = fmt.Errorf("service %v: %s", cr.input.NetworkAliases, err)
199+
}
200+
return resp, err
201+
}
202+
203+
func (cr *containerReference) IsHealthy(ctx context.Context) (time.Duration, error) {
204+
resp, err := cr.inspect(ctx)
205+
if err != nil {
206+
return 0, err
207+
}
208+
return cr.isHealthy(ctx, resp)
209+
}
210+
211+
func (cr *containerReference) isHealthy(ctx context.Context, resp container.InspectResponse) (time.Duration, error) {
212+
logger := common.Logger(ctx)
213+
if resp.Config == nil || resp.Config.Healthcheck == nil || resp.State == nil || resp.State.Health == nil || len(resp.Config.Healthcheck.Test) == 1 && strings.EqualFold(resp.Config.Healthcheck.Test[0], "NONE") {
214+
logger.Debugf("no container health check defined, hope for the best")
215+
return 0, nil
216+
}
217+
218+
switch resp.State.Health.Status {
219+
case container.Starting:
220+
wait := resp.Config.Healthcheck.Interval
221+
if wait <= 0 {
222+
wait = time.Second
223+
}
224+
logger.Infof("service %v: container health check %s (%s) is starting, waiting %v", cr.input.NetworkAliases, cr.id, resp.Config.Image, wait)
225+
return wait, nil
226+
case container.Healthy:
227+
logger.Infof("service %v: container health check %s (%s) is healthy", cr.input.NetworkAliases, cr.id, resp.Config.Image)
228+
return 0, nil
229+
case container.Unhealthy:
230+
return 0, fmt.Errorf("service %v: container health check %s (%s) is not healthy", cr.input.NetworkAliases, cr.id, resp.Config.Image)
231+
default:
232+
return 0, fmt.Errorf("service %v: unexpected health status %s (%s) %v", cr.input.NetworkAliases, cr.id, resp.Config.Image, resp.State.Health.Status)
233+
}
234+
}
235+
194236
func (cr *containerReference) ReplaceLogWriter(stdout, stderr io.Writer) (io.Writer, io.Writer) {
195237
out := cr.input.Stdout
196238
err := cr.input.Stderr

act/container/docker_run_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,79 @@ func TestMergeJobOptions(t *testing.T) {
386386
})
387387
}
388388
}
389+
390+
func TestDockerRun_isHealthy(t *testing.T) {
391+
cr := containerReference{
392+
id: "containerid",
393+
input: &NewContainerInput{
394+
NetworkAliases: []string{"servicename"},
395+
},
396+
}
397+
ctx := context.Background()
398+
makeInspectResponse := func(interval time.Duration, status container.HealthStatus, test []string) container.InspectResponse {
399+
return container.InspectResponse{
400+
Config: &container.Config{
401+
Image: "example.com/some/image",
402+
Healthcheck: &container.HealthConfig{
403+
Interval: interval,
404+
Test: test,
405+
},
406+
},
407+
ContainerJSONBase: &container.ContainerJSONBase{
408+
State: &container.State{
409+
Health: &container.Health{
410+
Status: status,
411+
},
412+
},
413+
},
414+
}
415+
}
416+
417+
t.Run("IncompleteResponseOrNoHealthCheck", func(t *testing.T) {
418+
wait, err := cr.isHealthy(ctx, container.InspectResponse{})
419+
assert.Zero(t, wait)
420+
assert.NoError(t, err)
421+
422+
// --no-healthcheck translates into a NONE test command
423+
resp := makeInspectResponse(0, container.NoHealthcheck, []string{"NONE"})
424+
wait, err = cr.isHealthy(ctx, resp)
425+
assert.Zero(t, wait)
426+
assert.NoError(t, err)
427+
})
428+
429+
t.Run("StartingUndefinedIntervalIsNotZero", func(t *testing.T) {
430+
resp := makeInspectResponse(0, container.Starting, nil)
431+
wait, err := cr.isHealthy(ctx, resp)
432+
assert.NotZero(t, wait)
433+
assert.NoError(t, err)
434+
})
435+
436+
t.Run("StartingWithInterval", func(t *testing.T) {
437+
expectedWait := time.Duration(42)
438+
resp := makeInspectResponse(expectedWait, container.Starting, nil)
439+
actualWait, err := cr.isHealthy(ctx, resp)
440+
assert.Equal(t, expectedWait, actualWait)
441+
assert.NoError(t, err)
442+
})
443+
444+
t.Run("Unhealthy", func(t *testing.T) {
445+
resp := makeInspectResponse(0, container.Unhealthy, nil)
446+
wait, err := cr.isHealthy(ctx, resp)
447+
assert.Zero(t, wait)
448+
assert.ErrorContains(t, err, "is not healthy")
449+
})
450+
451+
t.Run("Healthy", func(t *testing.T) {
452+
resp := makeInspectResponse(0, container.Healthy, nil)
453+
wait, err := cr.isHealthy(ctx, resp)
454+
assert.Zero(t, wait)
455+
assert.NoError(t, err)
456+
})
457+
458+
t.Run("UnknownStatus", func(t *testing.T) {
459+
resp := makeInspectResponse(0, container.NoHealthcheck, nil)
460+
wait, err := cr.isHealthy(ctx, resp)
461+
assert.Zero(t, wait)
462+
assert.ErrorContains(t, err, "unexpected")
463+
})
464+
}

act/container/host_environment.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,10 @@ func (e *HostEnvironment) GetRunnerContext(_ context.Context) map[string]interfa
493493
}
494494
}
495495

496+
func (e *HostEnvironment) IsHealthy(ctx context.Context) (time.Duration, error) {
497+
return 0, nil
498+
}
499+
496500
func (e *HostEnvironment) ReplaceLogWriter(stdout, _ io.Writer) (io.Writer, io.Writer) {
497501
org := e.StdOut
498502
e.StdOut = stdout

act/runner/run_context.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ func (rc *RunContext) startJobContainer() common.Executor {
610610
Mode: 0o666,
611611
Body: "",
612612
}),
613+
rc.waitForServiceContainers(),
613614
)(ctx)
614615
}
615616
}
@@ -744,6 +745,35 @@ func (rc *RunContext) startServiceContainers(_ string) common.Executor {
744745
}
745746
}
746747

748+
func waitForServiceContainer(ctx context.Context, c container.ExecutionsEnvironment) error {
749+
for {
750+
wait, err := c.IsHealthy(ctx)
751+
if err != nil {
752+
return err
753+
}
754+
if wait == time.Duration(0) {
755+
return nil
756+
}
757+
select {
758+
case <-ctx.Done():
759+
return nil
760+
case <-time.After(wait):
761+
}
762+
}
763+
}
764+
765+
func (rc *RunContext) waitForServiceContainers() common.Executor {
766+
return func(ctx context.Context) error {
767+
execs := []common.Executor{}
768+
for _, c := range rc.ServiceContainers {
769+
execs = append(execs, func(ctx context.Context) error {
770+
return waitForServiceContainer(ctx, c)
771+
})
772+
}
773+
return common.NewParallelExecutor(len(execs), execs...)(ctx)
774+
}
775+
}
776+
747777
func (rc *RunContext) stopServiceContainers() common.Executor {
748778
return func(ctx context.Context) error {
749779
execs := []common.Executor{}

act/runner/run_context_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ package runner
33
import (
44
"cmp"
55
"context"
6+
"errors"
67
"fmt"
78
"os"
89
"runtime"
910
"slices"
1011
"strings"
1112
"testing"
13+
"time"
1214

1315
"code.forgejo.org/forgejo/runner/v9/act/container"
1416
"code.forgejo.org/forgejo/runner/v9/act/exprparser"
@@ -18,6 +20,7 @@ import (
1820
"github.com/docker/go-connections/nat"
1921
log "github.com/sirupsen/logrus"
2022
"github.com/stretchr/testify/assert"
23+
"github.com/stretchr/testify/mock"
2124
"github.com/stretchr/testify/require"
2225
yaml "gopkg.in/yaml.v3"
2326
)
@@ -824,3 +827,44 @@ jobs:
824827
})
825828
}
826829
}
830+
831+
type waitForServiceContainerMock struct {
832+
mock.Mock
833+
container.Container
834+
container.LinuxContainerEnvironmentExtensions
835+
}
836+
837+
func (o *waitForServiceContainerMock) IsHealthy(ctx context.Context) (time.Duration, error) {
838+
args := o.Called(ctx)
839+
return args.Get(0).(time.Duration), args.Error(1)
840+
}
841+
842+
func Test_waitForServiceContainer(t *testing.T) {
843+
t.Run("Wait", func(t *testing.T) {
844+
m := &waitForServiceContainerMock{}
845+
ctx := context.Background()
846+
mock.InOrder(
847+
m.On("IsHealthy", ctx).Return(1*time.Millisecond, nil).Once(),
848+
m.On("IsHealthy", ctx).Return(time.Duration(0), nil).Once(),
849+
)
850+
require.NoError(t, waitForServiceContainer(ctx, m))
851+
m.AssertExpectations(t)
852+
})
853+
854+
t.Run("Cancel", func(t *testing.T) {
855+
m := &waitForServiceContainerMock{}
856+
ctx, cancel := context.WithCancel(context.Background())
857+
cancel()
858+
m.On("IsHealthy", ctx).Return(1*time.Millisecond, nil).Once()
859+
require.NoError(t, waitForServiceContainer(ctx, m))
860+
m.AssertExpectations(t)
861+
})
862+
863+
t.Run("Error", func(t *testing.T) {
864+
m := &waitForServiceContainerMock{}
865+
ctx := context.Background()
866+
m.On("IsHealthy", ctx).Return(time.Duration(0), errors.New("ERROR"))
867+
require.ErrorContains(t, waitForServiceContainer(ctx, m), "ERROR")
868+
m.AssertExpectations(t)
869+
})
870+
}

act/runner/runner_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,8 @@ func TestRunner_RunEvent(t *testing.T) {
322322
// services
323323
{workdir, "services", "push", "", platforms, secrets},
324324
{workdir, "services-with-container", "push", "", platforms, secrets},
325+
{workdir, "mysql-service-container-with-health-check", "push", "", platforms, secrets},
326+
{workdir, "mysql-service-container-premature-terminate", "push", "service [maindb]", platforms, secrets},
325327
}
326328

327329
for _, table := range tables {
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: service-container
2+
on: push
3+
jobs:
4+
service-container-test:
5+
runs-on: ubuntu-latest
6+
container: code.forgejo.org/oci/mysql:8.4
7+
services:
8+
maindb:
9+
image: code.forgejo.org/oci/mysql:8.4
10+
# This container should immediately exit due to missing env variable for password config. ... [ERROR]
11+
# [Entrypoint]: Database is uninitialized and password option is not specified You need to specify one of the
12+
# following as an environment variable:
13+
# - MYSQL_ROOT_PASSWORD
14+
# - MYSQL_ALLOW_EMPTY_PASSWORD
15+
# - MYSQL_RANDOM_ROOT_PASSWORD
16+
#
17+
# This container should retain the same health check config as the mysql-service-container-with-health-check
18+
# case.
19+
options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
20+
steps:
21+
- run: exit 100 # should never be hit since service will never be healthy
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
name: service-container
2+
on: push
3+
jobs:
4+
service-container-test:
5+
runs-on: ubuntu-latest
6+
container: code.forgejo.org/oci/mysql:8.4
7+
services:
8+
maindb:
9+
image: code.forgejo.org/oci/mysql:8.4
10+
env:
11+
MYSQL_DATABASE: dbname
12+
MYSQL_USER: dbuser
13+
MYSQL_PASSWORD: dbpass
14+
MYSQL_RANDOM_ROOT_PASSWORD: yes
15+
options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
16+
steps:
17+
- run: mysql -u dbuser -D dbname -pdbpass -h maindb -e "create table T(id INT NOT NULL AUTO_INCREMENT, val VARCHAR(255), PRIMARY KEY (id))"

act/runner/testdata/services/push.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
runs-on: ubuntu-latest
77
services:
88
postgres:
9-
image: code.forgejo.org/oci/bitnami/postgresql:16
9+
image: code.forgejo.org/oci/postgres:16
1010
env:
1111
POSTGRES_USER: runner
1212
POSTGRES_PASSWORD: mysecretdbpass
@@ -15,7 +15,7 @@ jobs:
1515
--health-cmd pg_isready
1616
--health-interval 10s
1717
--health-timeout 5s
18-
--health-retries 5
18+
--health-retries 20
1919
ports:
2020
- 5432:5432
2121
steps:

0 commit comments

Comments
 (0)