Skip to content

Commit 86510da

Browse files
authored
fix: Add health check endpoint (#116)
* CHANGELOG * fix: add a health check endpoint * CHANGELOG * fix: return wrapped error
1 parent 1acef32 commit 86510da

File tree

5 files changed

+96
-1
lines changed

5 files changed

+96
-1
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
3939

4040
### Improvements
4141

42+
* [#116](https://github.com/babylonlabs-io/covenant-emulator/pull/116) Add health check to prometheus server.
4243
* [#117](https://github.com/babylonlabs-io/covenant-emulator/pull/117) Add health check on startup, increase gas adjustment and update docs.
4344

45+
4446
## v0.14.0
4547

4648
* [#114](https://github.com/babylonlabs-io/covenant-emulator/pull/114) bump babylon to v1.0.0-rc.8

covenant/covenant.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,3 +610,23 @@ func (ce *CovenantEmulator) Stop() error {
610610
})
611611
return stopErr
612612
}
613+
614+
// CheckReadiness checks if the covenant emulator is ready to serve requests.
615+
// It verifies internal state (if the main loop is running) and connectivity to
616+
// dependencies (remote signer).
617+
func (ce *CovenantEmulator) CheckReadiness() error {
618+
select {
619+
case <-ce.quit:
620+
return fmt.Errorf("emulator is not running")
621+
default:
622+
// Emulator is running
623+
}
624+
625+
// Check connectivity to the remote signer by calling its PubKey endpoint
626+
_, err := ce.signer.PubKey()
627+
if err != nil {
628+
return fmt.Errorf("failed to get public key from covenant signer: %w", err)
629+
}
630+
631+
return nil
632+
}

covenant/service/prometheus.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,17 @@ import (
66
"net/http"
77
"time"
88

9+
logger "github.com/rs/zerolog"
10+
911
"github.com/prometheus/client_golang/prometheus/promhttp"
1012
"go.uber.org/zap"
1113
)
1214

15+
// ReadinessChecker defines the interface for checking readiness status
16+
type ReadinessChecker interface {
17+
CheckReadiness() error
18+
}
19+
1320
type PrometheusServer struct {
1421
svr *http.Server
1522

@@ -18,6 +25,8 @@ type PrometheusServer struct {
1825
interval time.Duration
1926

2027
quit chan struct{}
28+
29+
readinessChecker ReadinessChecker
2130
}
2231

2332
func NewPrometheusServer(addr string, interval time.Duration, logger *zap.Logger) *PrometheusServer {
@@ -39,6 +48,37 @@ func NewPrometheusServer(addr string, interval time.Duration, logger *zap.Logger
3948
}
4049
}
4150

51+
// SetReadinessChecker sets the readiness checker implementation
52+
func (ps *PrometheusServer) SetReadinessChecker(rc ReadinessChecker) {
53+
ps.readinessChecker = rc
54+
55+
// Add the readiness endpoint only after the checker is set
56+
if mux, ok := ps.svr.Handler.(*http.ServeMux); ok {
57+
mux.HandleFunc("/health", ps.readinessHandler)
58+
ps.logger.Info("Readiness endpoint registered at /health")
59+
}
60+
}
61+
62+
// readinessHandler handles the /health endpoint requests
63+
func (ps *PrometheusServer) readinessHandler(w http.ResponseWriter, r *http.Request) {
64+
if ps.readinessChecker == nil {
65+
ps.logger.Error("Readiness checker not configured")
66+
http.Error(w, "Readiness checker not configured", http.StatusInternalServerError)
67+
return
68+
}
69+
70+
if err := ps.readinessChecker.CheckReadiness(); err != nil {
71+
ps.logger.Error("Readiness check failed", zap.Error(err))
72+
http.Error(w, err.Error(), http.StatusServiceUnavailable)
73+
return
74+
}
75+
76+
w.WriteHeader(http.StatusOK)
77+
if _, err := w.Write([]byte("Healthy")); err != nil {
78+
logger.Ctx(r.Context()).Err(err).Msg("failed to write response")
79+
}
80+
}
81+
4282
func (ps *PrometheusServer) Start() {
4383
ps.logger.Info("Starting Prometheus server",
4484
zap.String("address", ps.svr.Addr))

covenant/service/server.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ func (s *CovenantServer) RunUntilShutdown() error {
4444
}
4545

4646
ps := NewPrometheusServer(promAddr, metricsCfg.UpdateInterval, s.logger)
47+
// Set the covenant emulator as the readiness checker
48+
ps.SetReadinessChecker(s.ce)
4749

4850
defer func() {
4951
ps.Stop()

docs/covenant-emulator-setup.md

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,4 +258,35 @@ covd start
258258
```
259259

260260
All the available CLI options can be viewed using the `--help` flag. These
261-
options can also be set in the configuration file.
261+
options can also be set in the configuration file.
262+
263+
## 6. Monitoring and Health Checks
264+
265+
### 6.1. Readiness Check Endpoint
266+
267+
The covenant emulator provides a health check endpoint at `/health` that can be used
268+
by monitoring systems, to determine if the service is ready to handle requests.
269+
270+
The endpoint can be accessed through the same address as the Prometheus metrics endpoint,
271+
which is configured in `covd.conf`:
272+
273+
```
274+
http://<prometheusAddr>/health
275+
```
276+
277+
The endpoint returns:
278+
- `HTTP 200 OK` with response body "Healthy" when the emulator is running properly
279+
and can connect to its dependencies
280+
- `HTTP 503 Service Unavailable` with an error message when the emulator is not ready
281+
282+
The readiness check verifies:
283+
1. Whether the emulator's main loop is running
284+
2. Connectivity to the remote signer by calling its PubKey endpoint
285+
286+
#### Example Usage
287+
288+
To check the emulator's readiness:
289+
290+
```bash
291+
curl -v http://127.0.0.1:2112/health
292+
```

0 commit comments

Comments
 (0)