Skip to content

Commit 410cca3

Browse files
authored
pkg/services/promhealth: new isolated package for prometheus health reporting (#1097)
1 parent 4aeae90 commit 410cca3

File tree

3 files changed

+110
-35
lines changed

3 files changed

+110
-35
lines changed

pkg/loop/server.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/smartcontractkit/chainlink-common/pkg/logger"
1717
"github.com/smartcontractkit/chainlink-common/pkg/services"
1818
"github.com/smartcontractkit/chainlink-common/pkg/settings/limits"
19+
"github.com/smartcontractkit/chainlink-common/pkg/services/promhealth"
1920
"github.com/smartcontractkit/chainlink-common/pkg/sqlutil"
2021
"github.com/smartcontractkit/chainlink-common/pkg/sqlutil/pg"
2122
)
@@ -153,7 +154,7 @@ func (s *Server) start() error {
153154
return fmt.Errorf("error starting prometheus server: %w", err)
154155
}
155156

156-
s.checker = services.NewChecker("", "")
157+
s.checker = promhealth.NewChecker("", "")
157158
if err := s.checker.Start(); err != nil {
158159
return fmt.Errorf("error starting health checker: %w", err)
159160
}

pkg/services/health.go

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ import (
88
"sync"
99
"testing"
1010
"time"
11-
12-
"github.com/prometheus/client_golang/prometheus"
13-
"github.com/prometheus/client_golang/prometheus/promauto"
1411
)
1512

1613
// HealthReporter should be implemented by any type requiring health checks.
@@ -52,7 +49,7 @@ type HealthChecker struct {
5249
chStop chan struct{}
5350
chDone chan struct{}
5451

55-
ver, sha string
52+
cfg HealthCheckerConfig
5653

5754
servicesMu sync.RWMutex
5855
services map[string]HealthReporter
@@ -64,34 +61,12 @@ type HealthChecker struct {
6461

6562
const interval = 15 * time.Second
6663

67-
var (
68-
healthStatus = promauto.NewGaugeVec(
69-
prometheus.GaugeOpts{
70-
Name: "health",
71-
Help: "Health status by service",
72-
},
73-
[]string{"service_id"},
74-
)
75-
uptimeSeconds = promauto.NewCounter(
76-
prometheus.CounterOpts{
77-
Name: "uptime_seconds",
78-
Help: "Uptime of the application measured in seconds",
79-
},
80-
)
81-
version = promauto.NewCounterVec(
82-
prometheus.CounterOpts{
83-
Name: "version",
84-
Help: "Application version information",
85-
},
86-
[]string{"version", "commit"},
87-
)
88-
)
89-
90-
// Deprecated: Use NewHealthChecker
64+
// Deprecated: Use HealthCheckerConfig.New or a helper like promhealth.NewChecker for the old behavior.
9165
func NewChecker(ver, sha string) *HealthChecker {
9266
return NewHealthChecker(ver, sha)
9367
}
9468

69+
// Deprecated: Use HealthCheckerConfig.New or a helper like promhealth.NewChecker for the old behavior.
9570
func NewHealthChecker(ver, sha string) *HealthChecker {
9671
if ver == "" || sha == "" {
9772
if bi, ok := debug.ReadBuildInfo(); ok {
@@ -106,9 +81,55 @@ func NewHealthChecker(ver, sha string) *HealthChecker {
10681
if len(sha) > 7 {
10782
sha = sha[:7]
10883
}
84+
return HealthCheckerConfig{Ver: ver, Sha: sha}.New()
85+
}
86+
87+
type HealthCheckerConfig struct {
88+
// Optionally override debug.BuildInfo
89+
Ver, Sha string
90+
// Optional hooks for reporting.
91+
IncVersion func(ver string, sha string)
92+
AddUptime func(duration time.Duration)
93+
SetStatus func(name string, status int)
94+
Delete func(name string)
95+
}
96+
97+
func (cfg HealthCheckerConfig) initVerSha() {
98+
if cfg.Ver == "" || cfg.Sha == "" {
99+
if bi, ok := debug.ReadBuildInfo(); ok {
100+
if cfg.Ver == "" {
101+
cfg.Ver = bi.Main.Version
102+
}
103+
if cfg.Sha == "" {
104+
cfg.Sha = bi.Main.Sum
105+
}
106+
}
107+
}
108+
if len(cfg.Sha) > 7 {
109+
cfg.Sha = cfg.Sha[:7]
110+
}
111+
}
112+
113+
func (cfg HealthCheckerConfig) setNoopHooks() {
114+
if cfg.IncVersion == nil {
115+
cfg.IncVersion = func(ver, sha string) {}
116+
}
117+
if cfg.AddUptime == nil {
118+
cfg.AddUptime = func(d time.Duration) {}
119+
}
120+
if cfg.SetStatus == nil {
121+
cfg.SetStatus = func(name string, status int) {}
122+
}
123+
if cfg.Delete == nil {
124+
cfg.Delete = func(name string) {}
125+
}
126+
}
127+
128+
func (cfg HealthCheckerConfig) New() *HealthChecker {
129+
cfg.initVerSha()
130+
cfg.setNoopHooks()
109131
return &HealthChecker{
110-
ver: ver,
111-
sha: sha,
132+
cfg: cfg,
112133
services: make(map[string]HealthReporter, 10),
113134
healthy: make(map[string]error, 10),
114135
ready: make(map[string]error, 10),
@@ -119,7 +140,7 @@ func NewHealthChecker(ver, sha string) *HealthChecker {
119140

120141
func (c *HealthChecker) Start() error {
121142
return c.StartOnce("HealthCheck", func() error {
122-
version.WithLabelValues(c.ver, c.sha).Inc()
143+
c.cfg.IncVersion(c.cfg.Ver, c.cfg.Sha)
123144

124145
// update immediately
125146
c.update()
@@ -175,10 +196,10 @@ func (c *HealthChecker) update() {
175196
}
176197

177198
// report metrics to prometheus
178-
healthStatus.WithLabelValues(name).Set(float64(value))
199+
c.cfg.SetStatus(name, value)
179200
}
180201
}
181-
uptimeSeconds.Add(interval.Seconds())
202+
c.cfg.AddUptime(interval)
182203

183204
// save state
184205
c.stateMu.Lock()
@@ -214,7 +235,7 @@ func (c *HealthChecker) Unregister(name string) error {
214235
c.servicesMu.Lock()
215236
defer c.servicesMu.Unlock()
216237
delete(c.services, name)
217-
healthStatus.DeleteLabelValues(name)
238+
c.cfg.Delete(name)
218239
return nil
219240
}
220241

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package promhealth
2+
3+
import (
4+
"time"
5+
6+
"github.com/prometheus/client_golang/prometheus"
7+
"github.com/prometheus/client_golang/prometheus/promauto"
8+
9+
"github.com/smartcontractkit/chainlink-common/pkg/services"
10+
)
11+
12+
var (
13+
healthStatus = promauto.NewGaugeVec(
14+
prometheus.GaugeOpts{
15+
Name: "health",
16+
Help: "Health status by service",
17+
},
18+
[]string{"service_id"},
19+
)
20+
uptimeSeconds = promauto.NewCounter(
21+
prometheus.CounterOpts{
22+
Name: "uptime_seconds",
23+
Help: "Uptime of the application measured in seconds",
24+
},
25+
)
26+
version = promauto.NewCounterVec(
27+
prometheus.CounterOpts{
28+
Name: "version",
29+
Help: "Application version information",
30+
},
31+
[]string{"version", "commit"},
32+
)
33+
)
34+
35+
// NewChecker returns a *services.HealthChecker with hooks for prometheus metrics.
36+
func NewChecker(ver, sha string) *services.HealthChecker {
37+
return services.HealthCheckerConfig{
38+
Ver: ver,
39+
Sha: sha,
40+
AddUptime: func(d time.Duration) {
41+
uptimeSeconds.Add(d.Seconds())
42+
},
43+
IncVersion: func(ver string, sha string) {
44+
version.WithLabelValues(ver, sha).Inc()
45+
},
46+
SetStatus: func(name string, value int) {
47+
healthStatus.WithLabelValues(name).Set(float64(value))
48+
},
49+
Delete: func(name string) {
50+
healthStatus.DeleteLabelValues(name)
51+
},
52+
}.New()
53+
}

0 commit comments

Comments
 (0)