Skip to content

Commit da5ee4c

Browse files
fix(prometheus): add graceful shutdown for server (#137)
fix: add graceful shutdown for Prometheus metrics server Refactor Prometheus server to match pprof pattern: - Use dedicated http.ServeMux instead of default mux - Manage server lifecycle with errgroup - Add graceful shutdown handler with 5s timeout - Proper error propagation instead of fire-and-forget goroutine This ensures the Prometheus server shuts down cleanly on SIGTERM/SIGINT and errors are properly tracked by the errgroup.
1 parent 4168185 commit da5ee4c

File tree

1 file changed

+28
-8
lines changed

1 file changed

+28
-8
lines changed

cmd/root.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,16 +124,36 @@ func Execute() error {
124124
prometheus.MustRegister(csbouncer.TotalLAPICalls, csbouncer.TotalLAPIError, metrics.TotalActiveDecisions, metrics.TotalBlockedRequests, metrics.TotalProcessedRequests)
125125

126126
if config.PrometheusConfig.Enabled {
127-
go func() {
128-
http.Handle("/metrics", promhttp.Handler())
127+
promMux := http.NewServeMux()
128+
promMux.Handle("/metrics", promhttp.Handler())
129129

130-
listenOn := net.JoinHostPort(
131-
config.PrometheusConfig.ListenAddress,
132-
config.PrometheusConfig.ListenPort,
133-
)
130+
listenOn := net.JoinHostPort(
131+
config.PrometheusConfig.ListenAddress,
132+
config.PrometheusConfig.ListenPort,
133+
)
134+
135+
promServer := &http.Server{
136+
Addr: listenOn,
137+
Handler: promMux,
138+
}
139+
140+
g.Go(func() error {
134141
log.Infof("Serving metrics at %s", listenOn+"/metrics")
135-
log.Error(http.ListenAndServe(listenOn, nil))
136-
}()
142+
if err := promServer.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
143+
return fmt.Errorf("prometheus server error: %w", err)
144+
}
145+
return nil
146+
})
147+
148+
g.Go(func() error {
149+
<-ctx.Done()
150+
log.Info("Shutting down prometheus server...")
151+
// Use background context since parent ctx is already canceled
152+
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
153+
defer cancel()
154+
//nolint:contextcheck // parent ctx is canceled, need fresh context for shutdown
155+
return promServer.Shutdown(shutdownCtx)
156+
})
137157
}
138158

139159
// pprof debug endpoint for runtime profiling (memory, CPU, goroutines)

0 commit comments

Comments
 (0)