Skip to content

Commit 2af75a4

Browse files
authored
Merge pull request #9 from kilnfi/feat/add-status-watcher
feat: add status watcher
2 parents 26d4976 + 3ead556 commit 2af75a4

17 files changed

+595
-54
lines changed

.gitignore

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,31 @@
1+
2+
3+
# Binaries for programs and plugins
4+
*.exe
5+
*.exe~
6+
*.dll
7+
*.so
8+
*.dylib
9+
10+
# configuration resources that should be keepd locally
111
config.yaml
2-
.codegpt
3-
bin/
412
config/
5-
*.db*
13+
exp/
14+
15+
# Test binary, built with `go test -c`
16+
*.test
17+
18+
# Output of the go coverage tool
19+
*.out
20+
21+
# Release
22+
dist/
23+
24+
# Dependency directories (remove the comment below to include it)
25+
bin/
26+
output/
27+
testbin/
28+
29+
# Others
30+
**/.DS_Store
31+
*.db*

cmd/watcher/app/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ type Config struct {
1717
BlockWatcherConfig BlockWatcherConfig `mapstructure:"block-watcher"`
1818
PoolWatcherConfig PoolWatcherConfig `mapstructure:"pool-watcher"`
1919
NetworkWatcherConfig NetworkWatcherConfig `mapstructure:"network-watcher"`
20+
StatusWatcherConfig StatusWatcherConfig `mapstructure:"status-watcher"`
2021
}
2122

2223
type BlockWatcherConfig struct {
@@ -34,6 +35,10 @@ type NetworkWatcherConfig struct {
3435
RefreshInterval int `mapstructure:"refresh-interval"`
3536
}
3637

38+
type StatusWatcherConfig struct {
39+
RefreshInterval int `mapstructure:"refresh-interval"`
40+
}
41+
3742
type HTTPConfig struct {
3843
Host string `mapstructure:"host"`
3944
Port int `mapstructure:"port"`

cmd/watcher/app/watcher.go

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func NewWatcherCommand() *cobra.Command {
9090
cmd.Flags().StringP("blockfrost-endpoint", "", "", "blockfrost API endpoint")
9191
cmd.Flags().IntP("blockfrost-max-routines", "", 10, "number of routines used by blockfrost to perform concurrent actions")
9292
cmd.Flags().IntP("blockfrost-timeout", "", 60, "Timeout for requests to the Blockfrost API (in seconds)")
93+
cmd.Flags().IntP("status-watcher-refresh-interval", "", 15, "Interval at which the status watcher collects data about the network (in seconds)")
9394
cmd.Flags().BoolP("network-watcher-enabled", "", true, "Enable network watcher")
9495
cmd.Flags().IntP("network-watcher-refresh-interval", "", 60, "Interval at which the network watcher collects data about the network (in seconds)")
9596
cmd.Flags().BoolP("pool-watcher-enabled", "", true, "Enable pool watcher")
@@ -112,10 +113,12 @@ func NewWatcherCommand() *cobra.Command {
112113
checkError(viper.BindPFlag("blockfrost.timeout", cmd.Flag("blockfrost-timeout")), "unable to bind blockfrost-timeout flag")
113114
checkError(viper.BindPFlag("network-watcher.enabled", cmd.Flag("network-watcher-enabled")), "unable to bind network-watcher-enabled flag")
114115
checkError(viper.BindPFlag("network-watcher.refresh-interval", cmd.Flag("network-watcher-refresh-interval")), "unable to bind network-watcher-refresh-interval flag")
116+
checkError(viper.BindPFlag("status-watcher.refresh-interval", cmd.Flag("status-watcher-refresh-interval")), "unable to bind status-watcher-refresh-interval flag")
115117
checkError(viper.BindPFlag("pool-watcher.enabled", cmd.Flag("pool-watcher-enabled")), "unable to bind pool-watcher-enabled flag")
116118
checkError(viper.BindPFlag("pool-watcher.refresh-interval", cmd.Flag("pool-watcher-refresh-interval")), "unable to bind pool-watcher-refresh-interval flag")
117119
checkError(viper.BindPFlag("block-watcher.enabled", cmd.Flag("block-watcher-enabled")), "unable to bind block-watcher-enabled flag")
118120
checkError(viper.BindPFlag("block-watcher.refresh-interval", cmd.Flag("block-watcher-refresh-interval")), "unable to bind block-watcher-refresh-interval flag")
121+
119122
return cmd
120123
}
121124

@@ -197,24 +200,29 @@ func run(_ *cobra.Command, _ []string) error {
197200
return fmt.Errorf("unable to refresh slot leaders: %w", err)
198201
}
199202

203+
healthStore := watcher.NewHealthStore()
204+
200205
// Start HTTP server
201-
if err := startHTTPServer(eg, registry); err != nil {
206+
if err := startHTTPServer(eg, registry, healthStore); err != nil {
202207
return fmt.Errorf("unable to start http server: %w", err)
203208
}
204209

210+
// Start Status Watcher
211+
startStatusWatcher(ctx, eg, cardano, blockfrost, metrics, healthStore)
212+
205213
// Start Pool Watcher
206214
if cfg.PoolWatcherConfig.Enabled {
207-
startPoolWatcher(ctx, eg, blockfrost, metrics, cfg.Pools)
215+
startPoolWatcher(ctx, eg, blockfrost, metrics, cfg.Pools, healthStore)
208216
}
209217

210218
// Start Block Watcher
211219
if cfg.BlockWatcherConfig.Enabled {
212-
startBlockWatcher(ctx, eg, cardano, blockfrost, slotLeaderService, metrics, cfg.Pools, database.DB)
220+
startBlockWatcher(ctx, eg, cardano, blockfrost, slotLeaderService, metrics, cfg.Pools, database.DB, healthStore)
213221
}
214222

215223
// Start Network Watcher
216224
if cfg.NetworkWatcherConfig.Enabled {
217-
startNetworkWatcher(ctx, eg, blockfrost, metrics)
225+
startNetworkWatcher(ctx, eg, blockfrost, metrics, healthStore)
218226
}
219227

220228
<-ctx.Done()
@@ -259,11 +267,12 @@ func createCardanoClient(blockfrost blockfrost.Client) cardano.CardanoClient {
259267
return cardanocli.NewClient(opts, blockfrost, &cardanocli.RealCommandExecutor{})
260268
}
261269

262-
func startHTTPServer(eg *errgroup.Group, registry *prometheus.Registry) error {
270+
func startHTTPServer(eg *errgroup.Group, registry *prometheus.Registry, healthStore *watcher.HealthStore) error {
263271
var err error
264272

265273
server, err = http.New(
266274
registry,
275+
healthStore,
267276
http.WithHost(cfg.HTTP.Host),
268277
http.WithPort(cfg.HTTP.Port),
269278
)
@@ -286,13 +295,36 @@ func startHTTPServer(eg *errgroup.Group, registry *prometheus.Registry) error {
286295
return nil
287296
}
288297

298+
// startStatusWatcher starts the status watcher service
299+
func startStatusWatcher(
300+
ctx context.Context,
301+
eg *errgroup.Group,
302+
cardano cardano.CardanoClient,
303+
blockfrost blockfrost.Client,
304+
metrics *metrics.Collection,
305+
healthStore *watcher.HealthStore,
306+
) {
307+
eg.Go(func() error {
308+
statusWatcher := watcher.NewStatusWatcher(blockfrost, cardano, metrics, healthStore)
309+
logger.Info(
310+
"starting watcher",
311+
slog.String("component", "status-watcher"),
312+
)
313+
if err := statusWatcher.Start(ctx); err != nil {
314+
return fmt.Errorf("unable to start status watcher: %w", err)
315+
}
316+
return nil
317+
})
318+
}
319+
289320
// startPoolWatcher starts the pool watcher service
290321
func startPoolWatcher(
291322
ctx context.Context,
292323
eg *errgroup.Group,
293324
blockfrost blockfrost.Client,
294325
metrics *metrics.Collection,
295326
pools pools.Pools,
327+
healthStore *watcher.HealthStore,
296328
) {
297329
eg.Go(func() error {
298330
options := watcher.PoolWatcherOptions{
@@ -303,7 +335,7 @@ func startPoolWatcher(
303335
"starting watcher",
304336
slog.String("component", "pool-watcher"),
305337
)
306-
poolWatcher, err := watcher.NewPoolWatcher(blockfrost, metrics, pools, options)
338+
poolWatcher, err := watcher.NewPoolWatcher(blockfrost, metrics, pools, healthStore, options)
307339
if err != nil {
308340
return fmt.Errorf("unable to create pool watcher: %w", err)
309341
}
@@ -314,12 +346,12 @@ func startPoolWatcher(
314346
})
315347
}
316348

317-
// startNetworkWatcher starts the network watcher service
318349
func startNetworkWatcher(
319350
ctx context.Context,
320351
eg *errgroup.Group,
321352
blockfrost blockfrost.Client,
322353
metrics *metrics.Collection,
354+
healthStore *watcher.HealthStore,
323355
) {
324356
eg.Go(func() error {
325357
options := watcher.NetworkWatcherOptions{
@@ -331,7 +363,7 @@ func startNetworkWatcher(
331363
"starting watcher",
332364
slog.String("component", "network-watcher"),
333365
)
334-
networkWatcher := watcher.NewNetworkWatcher(blockfrost, metrics, options)
366+
networkWatcher := watcher.NewNetworkWatcher(blockfrost, metrics, healthStore, options)
335367
if err := networkWatcher.Start(ctx); err != nil {
336368
return fmt.Errorf("unable to start network watcher: %w", err)
337369
}
@@ -349,12 +381,13 @@ func startBlockWatcher(
349381
metrics *metrics.Collection,
350382
pools pools.Pools,
351383
db *sqlx.DB,
384+
healthStore *watcher.HealthStore,
352385
) {
353386
eg.Go(func() error {
354387
options := watcher.BlockWatcherOptions{
355388
RefreshInterval: time.Second * time.Duration(cfg.BlockWatcherConfig.RefreshInterval),
356389
}
357-
blockWatcher := watcher.NewBlockWatcher(cardano, blockfrost, sl, pools, metrics, db, options)
390+
blockWatcher := watcher.NewBlockWatcher(cardano, blockfrost, sl, pools, metrics, db, healthStore, options)
358391
logger.Info(
359392
"starting watcher",
360393
slog.String("component", "block-watcher"),

internal/metrics/metrics.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ type Collection struct {
2727
ExpectedBlocks *prometheus.GaugeVec
2828
LatestSlotProcessedByBlockWatcher prometheus.Gauge
2929
NextSlotLeader *prometheus.GaugeVec
30+
HealthStatus prometheus.Gauge
3031
}
3132

3233
func NewCollection() *Collection {
@@ -189,6 +190,13 @@ func NewCollection() *Collection {
189190
},
190191
[]string{"pool_name", "pool_id", "pool_instance", "epoch"},
191192
),
193+
HealthStatus: prometheus.NewGauge(
194+
prometheus.GaugeOpts{
195+
Namespace: "cardano_validator_watcher",
196+
Name: "health_status",
197+
Help: "Health status of the Cardano validator watcher: 1 = healthy, 0 = unhealthy",
198+
},
199+
),
192200
}
193201
}
194202

@@ -198,14 +206,14 @@ func (m *Collection) MustRegister(reg prometheus.Registerer) {
198206
reg.MustRegister(m.ChainID)
199207
reg.MustRegister(m.EpochDuration)
200208
reg.MustRegister(m.NetworkEpoch)
201-
reg.MustRegister(m.NextEpochStartTime)
202209
reg.MustRegister(m.NetworkBlockHeight)
203210
reg.MustRegister(m.NetworkSlot)
204211
reg.MustRegister(m.NetworkEpochSlot)
205212
reg.MustRegister(m.NetworkTotalPools)
206213
reg.MustRegister(m.NetworkCurrentEpochProposedBlocks)
207214
reg.MustRegister(m.NetworkActiveStake)
208215
reg.MustRegister(m.RelaysPerPool)
216+
reg.MustRegister(m.NextEpochStartTime)
209217
reg.MustRegister(m.PoolsPledgeMet)
210218
reg.MustRegister(m.PoolsSaturationLevel)
211219
reg.MustRegister(m.MonitoredValidatorsCount)
@@ -216,4 +224,5 @@ func (m *Collection) MustRegister(reg prometheus.Registerer) {
216224
reg.MustRegister(m.ExpectedBlocks)
217225
reg.MustRegister(m.LatestSlotProcessedByBlockWatcher)
218226
reg.MustRegister(m.NextSlotLeader)
227+
reg.MustRegister(m.HealthStatus)
219228
}

internal/metrics/metrics_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func TestMustRegister(t *testing.T) {
3333
metrics.MustRegister(registry)
3434

3535
// The expected number of metrics to be registered, based on the definitions provided in the Collection struct.
36-
expectedMetricsCount := 21
36+
expectedMetricsCount := 22
3737

3838
var totalRegisteredMetrics int
3939
size, _ := registry.Gather()

internal/server/http/handlers.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,21 @@ package http
33
import (
44
"log/slog"
55
"net/http"
6+
7+
"github.com/kilnfi/cardano-validator-watcher/internal/watcher"
68
)
79

810
// Handler represents the HTTP handlers for the server
911
type Handler struct {
10-
logger *slog.Logger
12+
logger *slog.Logger
13+
healthStore *watcher.HealthStore
1114
}
1215

1316
// NewHandler returns a new Handler
14-
func NewHandler(logger *slog.Logger) *Handler {
17+
func NewHandler(logger *slog.Logger, healthStore *watcher.HealthStore) *Handler {
1518
return &Handler{
16-
logger: logger,
19+
logger: logger,
20+
healthStore: healthStore,
1721
}
1822
}
1923

@@ -38,6 +42,10 @@ func (h *Handler) LiveProbe(w http.ResponseWriter, _ *http.Request) {
3842
// If the service is ready, it returns a 200 OK status
3943
// If the service is not ready, it returns a 500 Internal Server Error status
4044
func (h *Handler) ReadyProbe(w http.ResponseWriter, _ *http.Request) {
45+
if !h.healthStore.GetHealth() {
46+
http.Error(w, "Health KO", http.StatusInternalServerError)
47+
return
48+
}
4149
w.WriteHeader(http.StatusOK)
4250
_, _ = w.Write([]byte("Health OK"))
4351
}

internal/server/http/handlers_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"testing"
77

88
"github.com/kilnfi/cardano-validator-watcher/internal/metrics"
9+
"github.com/kilnfi/cardano-validator-watcher/internal/watcher"
910
"github.com/prometheus/client_golang/prometheus"
1011
"github.com/stretchr/testify/assert"
1112
"github.com/stretchr/testify/require"
@@ -20,8 +21,10 @@ func TestDefaultHandler(t *testing.T) {
2021
r := httptest.NewRequest(http.MethodGet, "/", nil)
2122
w := httptest.NewRecorder()
2223

24+
healthStore := watcher.NewHealthStore()
2325
server, err := New(
2426
nil,
27+
healthStore,
2528
)
2629

2730
require.NoError(t, err)
@@ -35,8 +38,10 @@ func TestDefaultHandler(t *testing.T) {
3538
r := httptest.NewRequest(http.MethodGet, "/fake", nil)
3639
w := httptest.NewRecorder()
3740

41+
healthStore := watcher.NewHealthStore()
3842
server, err := New(
3943
nil,
44+
healthStore,
4045
)
4146

4247
require.NoError(t, err)
@@ -54,8 +59,10 @@ func TestLiveProbe(t *testing.T) {
5459
r := httptest.NewRequest(http.MethodGet, "/livez", nil)
5560
w := httptest.NewRecorder()
5661

62+
healthStore := watcher.NewHealthStore()
5763
server, err := New(
5864
nil,
65+
healthStore,
5966
)
6067
require.NoError(t, err)
6168
server.router.ServeHTTP(w, r)
@@ -73,14 +80,33 @@ func TestReadyProbe(t *testing.T) {
7380
r := httptest.NewRequest(http.MethodGet, "/readyz", nil)
7481
w := httptest.NewRecorder()
7582

83+
healthStore := watcher.NewHealthStore()
84+
healthStore.SetHealth(true)
7685
server, err := New(
7786
nil,
87+
healthStore,
7888
)
7989
require.NoError(t, err)
8090
server.router.ServeHTTP(w, r)
8191

8292
assert.Equal(t, http.StatusOK, w.Code)
8393
})
94+
95+
t.Run("SadPath_ReadyProbeIsNotReady", func(t *testing.T) {
96+
r := httptest.NewRequest(http.MethodGet, "/readyz", nil)
97+
w := httptest.NewRecorder()
98+
99+
healthStore := watcher.NewHealthStore()
100+
healthStore.SetHealth(false)
101+
server, err := New(
102+
nil,
103+
healthStore,
104+
)
105+
require.NoError(t, err)
106+
server.router.ServeHTTP(w, r)
107+
108+
assert.Equal(t, http.StatusInternalServerError, w.Code)
109+
})
84110
}
85111

86112
func TestMetricsHandler(t *testing.T) {
@@ -96,8 +122,10 @@ func TestMetricsHandler(t *testing.T) {
96122
metrics := metrics.NewCollection()
97123
metrics.MustRegister(registry)
98124

125+
healthStore := watcher.NewHealthStore()
99126
server, err := New(
100127
registry,
128+
healthStore,
101129
)
102130

103131
require.NoError(t, err)

0 commit comments

Comments
 (0)