@@ -3,18 +3,29 @@ package metrics
33import (
44 "context"
55 "errors"
6+ "net"
67 "net/http"
78 "strconv"
89 "time"
910
1011 "github.com/prometheus/client_golang/prometheus/promhttp"
1112 "github.com/rs/zerolog"
13+ "github.com/rs/zerolog/log"
14+
15+ "github.com/onflow/flow-go/module/component"
16+ "github.com/onflow/flow-go/module/irrecoverable"
1217)
1318
19+ // metricsServerShutdownTimeout is the time to wait for the server to shut down gracefully
20+ const metricsServerShutdownTimeout = 5 * time .Second
21+
1422// Server is the http server that will be serving the /metrics request for prometheus
1523type Server struct {
16- server * http.Server
17- log zerolog.Logger
24+ component.Component
25+
26+ address string
27+ server * http.Server
28+ log zerolog.Logger
1829}
1930
2031// NewServer creates a new server that will start on the specified port,
@@ -25,44 +36,71 @@ func NewServer(log zerolog.Logger, port uint) *Server {
2536 mux := http .NewServeMux ()
2637 endpoint := "/metrics"
2738 mux .Handle (endpoint , promhttp .Handler ())
28- log .Info ().Str ("address" , addr ).Str ("endpoint" , endpoint ).Msg ("metrics server started" )
2939
3040 m := & Server {
31- server : & http.Server {Addr : addr , Handler : mux },
32- log : log ,
41+ address : addr ,
42+ server : & http.Server {Addr : addr , Handler : mux },
43+ log : log .With ().Str ("address" , addr ).Str ("endpoint" , endpoint ).Logger (),
3344 }
3445
46+ m .Component = component .NewComponentManagerBuilder ().
47+ AddWorker (m .serve ).
48+ AddWorker (m .shutdownOnContextDone ).
49+ Build ()
50+
3551 return m
3652}
3753
38- // Ready returns a channel that will close when the network stack is ready.
39- func (m * Server ) Ready () <- chan struct {} {
40- ready := make (chan struct {})
41- go func () {
42- if err := m .server .ListenAndServe (); err != nil {
43- // http.ErrServerClosed is returned when Close or Shutdown is called
44- // we don't consider this an error, so print this with debug level instead
45- if errors .Is (err , http .ErrServerClosed ) {
46- m .log .Debug ().Err (err ).Msg ("metrics server shutdown" )
47- } else {
48- m .log .Err (err ).Msg ("error shutting down metrics server" )
49- }
54+ func (m * Server ) serve (ctx irrecoverable.SignalerContext , ready component.ReadyFunc ) {
55+ m .log .Info ().Msg ("starting metrics server on address" )
56+
57+ l , err := net .Listen ("tcp" , m .address )
58+ if err != nil {
59+ m .log .Err (err ).Msg ("failed to start the metrics server" )
60+ ctx .Throw (err )
61+ return
62+ }
63+
64+ ready ()
65+
66+ // pass the signaler context to the server so that the signaler context
67+ // can control the server's lifetime
68+ m .server .BaseContext = func (_ net.Listener ) context.Context {
69+ return ctx
70+ }
71+
72+ err = m .server .Serve (l ) // blocking call
73+ if err != nil {
74+ if errors .Is (err , http .ErrServerClosed ) {
75+ return
5076 }
51- }()
52- go func () {
53- close (ready )
54- }()
55- return ready
77+ log .Err (err ).Msg ("fatal error in the metrics server" )
78+ ctx .Throw (err )
79+ }
5680}
5781
58- // Done returns a channel that will close when shutdown is complete.
59- func (m * Server ) Done () <- chan struct {} {
60- done := make (chan struct {})
61- go func () {
62- ctx , cancel := context .WithTimeout (context .Background (), 5 * time .Second )
63- _ = m .server .Shutdown (ctx )
64- cancel ()
65- close (done )
66- }()
67- return done
82+ func (m * Server ) shutdownOnContextDone (ictx irrecoverable.SignalerContext , ready component.ReadyFunc ) {
83+ ready ()
84+ <- ictx .Done ()
85+
86+ ctx , cancel := context .WithTimeout (context .Background (), metricsServerShutdownTimeout )
87+ defer cancel ()
88+
89+ // shutdown the server gracefully
90+ err := m .server .Shutdown (ctx )
91+ if err == nil {
92+ m .log .Info ().Msg ("metrics server graceful shutdown completed" )
93+ return
94+ }
95+
96+ if errors .Is (err , ctx .Err ()) {
97+ m .log .Warn ().Msg ("metrics server graceful shutdown timed out" )
98+ // shutdown the server forcefully
99+ err := m .server .Close ()
100+ if err != nil {
101+ m .log .Err (err ).Msg ("error closing metrics server" )
102+ }
103+ } else {
104+ m .log .Err (err ).Msg ("error shutting down metrics server" )
105+ }
68106}
0 commit comments