Skip to content

Commit f8ffc5a

Browse files
authored
fix: restore Docker HEALTHCHECK with built-in --health-check flag (#385)
## Summary - Add `--health-check` flag to binary that performs HTTP GET to `/health/live` - Restore `HEALTHCHECK` instruction in Dockerfile (works with distroless - no shell/curl needed) - Remove `healthcheck: disable: true` from compose.yml - Add integration test for Docker health check verification ## Background The previous HEALTHCHECK was removed in #383 because the `--health-check` flag didn't exist in the binary. This PR implements the flag properly so the health check actually works. ## Test plan - [x] Built Docker image and verified container becomes healthy - [x] Tested `/health/live` endpoint returns `{"status":"alive"}` - [x] Added integration test `TestDockerHealthCheck` that verifies the full flow
2 parents ecf4e15 + 295e3b7 commit f8ffc5a

File tree

4 files changed

+243
-3
lines changed

4 files changed

+243
-3
lines changed

Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,5 +150,9 @@ COPY --from=backend-builder \
150150
# Set user to nonroot for security (UID 65532)
151151
USER nonroot:nonroot
152152

153+
# Health check using built-in --health-check flag (works in distroless without shell)
154+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
155+
CMD ["/ldap-passwd", "--health-check"]
156+
153157
# Use vector form for ENTRYPOINT as recommended by distroless docs
154158
ENTRYPOINT ["/ldap-passwd"]

cmd/ldap-manager/main.go

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package main
44

55
import (
66
"context"
7+
"net/http"
78
"os"
89
"os/signal"
910
"syscall"
@@ -17,9 +18,18 @@ import (
1718
"github.com/netresearch/ldap-manager/internal/web"
1819
)
1920

20-
const shutdownTimeout = 30 * time.Second
21+
const (
22+
shutdownTimeout = 30 * time.Second
23+
healthCheckTimeout = 3 * time.Second
24+
healthCheckEndpoint = "http://localhost:3000/health/live"
25+
)
2126

2227
func main() {
28+
// Handle --health-check flag early, before any other initialization
29+
if len(os.Args) == 2 && os.Args[1] == "--health-check" {
30+
os.Exit(runHealthCheck())
31+
}
32+
2333
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
2434

2535
log.Info().Msgf("LDAP Manager %s starting...", version.FormatVersion())
@@ -74,3 +84,29 @@ func main() {
7484

7585
log.Info().Msg("Graceful shutdown complete")
7686
}
87+
88+
// runHealthCheck performs an HTTP health check against the running application.
89+
// Returns 0 if healthy (HTTP 200), 1 otherwise.
90+
// Used by Docker HEALTHCHECK to verify the application is running correctly.
91+
func runHealthCheck() int {
92+
ctx, cancel := context.WithTimeout(context.Background(), healthCheckTimeout)
93+
defer cancel()
94+
95+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthCheckEndpoint, nil)
96+
if err != nil {
97+
return 1
98+
}
99+
100+
client := &http.Client{}
101+
resp, err := client.Do(req)
102+
if err != nil {
103+
return 1
104+
}
105+
defer func() { _ = resp.Body.Close() }()
106+
107+
if resp.StatusCode == http.StatusOK {
108+
return 0
109+
}
110+
111+
return 1
112+
}

compose.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ services:
105105
- /etc/ssl/certs:/etc/ssl/certs:ro
106106
- /usr/local/share/ca-certificates:/usr/local/share/ca-certificates:ro
107107
restart: unless-stopped
108-
healthcheck:
109-
disable: true
110108
profiles:
111109
- prod
112110
ports:
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
//go:build integration
2+
3+
package integration
4+
5+
import (
6+
"context"
7+
"encoding/json"
8+
"fmt"
9+
"net/http"
10+
"os"
11+
"testing"
12+
"time"
13+
14+
"github.com/stretchr/testify/assert"
15+
"github.com/stretchr/testify/require"
16+
"github.com/testcontainers/testcontainers-go"
17+
"github.com/testcontainers/testcontainers-go/wait"
18+
)
19+
20+
// TestDockerHealthCheck verifies the Docker HEALTHCHECK directive works correctly
21+
// by building and running the production container and checking its health status.
22+
//
23+
// Note: This test builds the Docker image from source, which requires Docker BuildKit
24+
// and may not work in all CI environments (e.g., GitHub Actions runners have platform
25+
// detection issues with testcontainers). Run locally with: make test-integration
26+
func TestDockerHealthCheck(t *testing.T) {
27+
if testing.Short() {
28+
t.Skip("Skipping Docker healthcheck test in short mode")
29+
}
30+
31+
// Skip in CI environments where Docker-in-Docker building has platform issues
32+
if os.Getenv("CI") == "true" || os.Getenv("GITHUB_ACTIONS") == "true" {
33+
t.Skip("Skipping Docker build test in CI - run locally with: make test-integration")
34+
}
35+
36+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
37+
defer cancel()
38+
39+
// Start OpenLDAP first
40+
ldapContainer, err := StartOpenLDAP(ctx, DefaultOpenLDAPConfig())
41+
require.NoError(t, err, "Failed to start OpenLDAP container")
42+
defer func() { _ = ldapContainer.Stop(ctx) }()
43+
44+
// Wait for LDAP to be ready and seed test data
45+
time.Sleep(2 * time.Second)
46+
err = ldapContainer.SeedTestData(ctx)
47+
require.NoError(t, err, "Failed to seed test data")
48+
49+
// Get LDAP container network info
50+
ldapNetwork, err := ldapContainer.Container.ContainerIP(ctx)
51+
require.NoError(t, err, "Failed to get LDAP container IP")
52+
53+
// Build and start the ldap-manager container
54+
appContainer, appPort, err := startLDAPManagerContainer(ctx, t, ldapNetwork, ldapContainer)
55+
require.NoError(t, err, "Failed to start ldap-manager container")
56+
defer func() { _ = appContainer.Terminate(ctx) }()
57+
58+
t.Run("container becomes healthy", func(t *testing.T) {
59+
// The container should become healthy via Docker HEALTHCHECK
60+
// We wait for up to 60 seconds (start_period=5s, interval=30s, so first check at ~35s)
61+
healthy := waitForContainerHealth(ctx, t, appContainer, 60*time.Second)
62+
assert.True(t, healthy, "Container should become healthy")
63+
})
64+
65+
t.Run("liveness endpoint returns 200", func(t *testing.T) {
66+
resp, err := doHTTPGet(ctx, fmt.Sprintf("http://localhost:%s/health/live", appPort))
67+
require.NoError(t, err, "Failed to call liveness endpoint")
68+
defer func() { _ = resp.Body.Close() }()
69+
70+
assert.Equal(t, http.StatusOK, resp.StatusCode, "Liveness endpoint should return 200")
71+
72+
var result map[string]interface{}
73+
err = json.NewDecoder(resp.Body).Decode(&result)
74+
require.NoError(t, err, "Failed to decode response")
75+
assert.Equal(t, "alive", result["status"], "Status should be 'alive'")
76+
})
77+
78+
t.Run("health endpoint returns details", func(t *testing.T) {
79+
resp, err := doHTTPGet(ctx, fmt.Sprintf("http://localhost:%s/health", appPort))
80+
require.NoError(t, err, "Failed to call health endpoint")
81+
defer func() { _ = resp.Body.Close() }()
82+
83+
assert.Equal(t, http.StatusOK, resp.StatusCode, "Health endpoint should return 200")
84+
85+
var result map[string]interface{}
86+
err = json.NewDecoder(resp.Body).Decode(&result)
87+
require.NoError(t, err, "Failed to decode response")
88+
assert.Contains(t, result, "cache", "Response should contain cache info")
89+
assert.Contains(t, result, "connection_pool", "Response should contain connection pool info")
90+
assert.Contains(t, result, "overall_healthy", "Response should contain overall health status")
91+
})
92+
93+
t.Run("readiness endpoint returns ready", func(t *testing.T) {
94+
// Wait a bit for cache warmup
95+
time.Sleep(5 * time.Second)
96+
97+
resp, err := doHTTPGet(ctx, fmt.Sprintf("http://localhost:%s/health/ready", appPort))
98+
require.NoError(t, err, "Failed to call readiness endpoint")
99+
defer func() { _ = resp.Body.Close() }()
100+
101+
// Readiness might be 200 (ready) or 503 (warming up) - both are valid
102+
assert.Contains(t, []int{http.StatusOK, http.StatusServiceUnavailable}, resp.StatusCode,
103+
"Readiness endpoint should return 200 or 503")
104+
})
105+
}
106+
107+
// startLDAPManagerContainer builds and starts the ldap-manager production container.
108+
func startLDAPManagerContainer(
109+
ctx context.Context,
110+
t *testing.T,
111+
ldapIP string,
112+
ldap *OpenLDAPContainer,
113+
) (testcontainers.Container, string, error) {
114+
t.Helper()
115+
116+
req := testcontainers.ContainerRequest{
117+
FromDockerfile: testcontainers.FromDockerfile{
118+
Context: "../..",
119+
Dockerfile: "Dockerfile",
120+
BuildArgs: map[string]*string{
121+
"BUILD_DATE": strPtr(time.Now().UTC().Format(time.RFC3339)),
122+
"VCS_REF": strPtr("test"),
123+
},
124+
KeepImage: true,
125+
},
126+
ExposedPorts: []string{"3000/tcp"},
127+
Env: map[string]string{
128+
"LDAP_SERVER": fmt.Sprintf("ldap://%s:389", ldapIP),
129+
"LDAP_BASE_DN": ldap.BaseDN,
130+
"LDAP_READONLY_USER": ldap.AdminDN,
131+
"LDAP_READONLY_PASSWORD": ldap.AdminPass,
132+
"SESSION_SECRET": "test-session-secret-for-integration-tests",
133+
"LOG_LEVEL": "debug",
134+
},
135+
WaitingFor: wait.ForHTTP("/health/live").
136+
WithPort("3000/tcp").
137+
WithStartupTimeout(90 * time.Second),
138+
}
139+
140+
container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
141+
ContainerRequest: req,
142+
Started: true,
143+
})
144+
if err != nil {
145+
return nil, "", fmt.Errorf("failed to start ldap-manager container: %w", err)
146+
}
147+
148+
port, err := container.MappedPort(ctx, "3000")
149+
if err != nil {
150+
_ = container.Terminate(ctx)
151+
152+
return nil, "", fmt.Errorf("failed to get mapped port: %w", err)
153+
}
154+
155+
return container, port.Port(), nil
156+
}
157+
158+
// waitForContainerHealth polls the container health status until healthy or timeout.
159+
func waitForContainerHealth(
160+
ctx context.Context,
161+
t *testing.T,
162+
container testcontainers.Container,
163+
timeout time.Duration,
164+
) bool {
165+
t.Helper()
166+
167+
deadline := time.Now().Add(timeout)
168+
for time.Now().Before(deadline) {
169+
state, err := container.State(ctx)
170+
if err != nil {
171+
t.Logf("Error getting container state: %v", err)
172+
time.Sleep(2 * time.Second)
173+
174+
continue
175+
}
176+
177+
if state.Health != nil && state.Health.Status == "healthy" {
178+
t.Logf("Container is healthy after %v", timeout-time.Until(deadline))
179+
180+
return true
181+
}
182+
183+
t.Logf("Container health status: %v", state.Health)
184+
time.Sleep(2 * time.Second)
185+
}
186+
187+
return false
188+
}
189+
190+
// doHTTPGet performs an HTTP GET request with context.
191+
func doHTTPGet(ctx context.Context, url string) (*http.Response, error) {
192+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
193+
if err != nil {
194+
return nil, err
195+
}
196+
197+
return http.DefaultClient.Do(req)
198+
}
199+
200+
func strPtr(s string) *string {
201+
return &s
202+
}

0 commit comments

Comments
 (0)