Skip to content

Commit 56ce74d

Browse files
authored
feat: add WebSocket-specific Prometheus metrics (#374)
1 parent ad040ee commit 56ce74d

File tree

2 files changed

+80
-5
lines changed

2 files changed

+80
-5
lines changed

docs/metrics.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,18 @@ These are custom application metrics specific to the QuickPizza application, imp
6868

6969
- `quickpizza_server_http_request_duration_seconds_gauge`: Duration of HTTP request processing (Gauge).
7070

71-
- `quickpizza_server_http_requests_total`: Total number of HTTP requests received (Counter metric).
71+
- `quickpizza_server_http_requests_total`: Total number of HTTP requests received (Counter metric).
72+
73+
## QuickPizza WebSocket Metrics
74+
75+
`quickpizza_server_ws_*`
76+
77+
These metrics track WebSocket connection lifecycle and message processing. They are separate from HTTP metrics because WebSocket connections are long-lived and would skew HTTP latency results.
78+
79+
- `quickpizza_server_ws_connections_active`: Number of currently active WebSocket connections (Gauge).
80+
81+
- `quickpizza_server_ws_connection_duration_seconds`: Duration of WebSocket connections in seconds (Native Histogram).
82+
83+
- `quickpizza_server_ws_messages_received_total`: Total number of messages received via WebSocket (Counter).
84+
85+
- `quickpizza_server_ws_message_processing_duration_seconds`: Time to process and broadcast incoming WebSocket messages (Native Histogram).

pkg/http/http.go

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,41 @@ var (
122122
Name: "http_request_duration_seconds_gauge",
123123
Help: "The duration of HTTP requests (Gauge)",
124124
}, []string{"method", "path", "status"})
125+
126+
// WebSocket metrics
127+
wsConnectionsActive = promauto.NewGauge(prometheus.GaugeOpts{
128+
Namespace: "quickpizza",
129+
Subsystem: "server",
130+
Name: "ws_connections_active",
131+
Help: "Number of active WebSocket connections",
132+
})
133+
134+
wsConnectionDuration = promauto.NewHistogram(prometheus.HistogramOpts{
135+
Namespace: "quickpizza",
136+
Subsystem: "server",
137+
Name: "ws_connection_duration_seconds",
138+
Help: "Duration of WebSocket connections",
139+
NativeHistogramBucketFactor: 1.1,
140+
NativeHistogramMaxBucketNumber: 100,
141+
NativeHistogramMinResetDuration: 1 * time.Hour,
142+
})
143+
144+
wsMessagesReceived = promauto.NewCounter(prometheus.CounterOpts{
145+
Namespace: "quickpizza",
146+
Subsystem: "server",
147+
Name: "ws_messages_received_total",
148+
Help: "Total number of messages received via WebSocket",
149+
})
150+
151+
wsMessageProcessingDuration = promauto.NewHistogram(prometheus.HistogramOpts{
152+
Namespace: "quickpizza",
153+
Subsystem: "server",
154+
Name: "ws_message_processing_duration_seconds",
155+
Help: "Time to process and broadcast incoming WebSocket messages",
156+
NativeHistogramBucketFactor: 1.1,
157+
NativeHistogramMaxBucketNumber: 100,
158+
NativeHistogramMinResetDuration: 1 * time.Hour,
159+
})
125160
)
126161

127162
// PizzaRecommendation is the object returned by the /api/pizza endpoint.
@@ -401,7 +436,7 @@ func (s *Server) AddConfigHandler(config map[string]string) {
401436
// TODO: So far the gateway only handles a few endpoints.
402437
func (s *Server) AddGateway(catalogUrl, copyUrl, wsUrl, recommendationsUrl, configUrl string) {
403438
s.router.Group(func(r chi.Router) {
404-
s.traceInstaller.Install(r, "gateway")
439+
s.traceInstaller.Install(r, "gateway", excludeWebSocketFromOTel())
405440

406441
// Generate client traces for requests proxied by the gateway.
407442
otelTransport := otelhttp.NewTransport(
@@ -457,7 +492,7 @@ func (s *Server) AddGateway(catalogUrl, copyUrl, wsUrl, recommendationsUrl, conf
457492
// AddWebSocket enables serving and handle websockets.
458493
func (s *Server) AddWebSocket() {
459494
s.router.Group(func(r chi.Router) {
460-
s.traceInstaller.Install(r, "ws")
495+
s.traceInstaller.Install(r, "ws", excludeWebSocketFromOTel())
461496

462497
r.Get("/ws", func(w http.ResponseWriter, r *http.Request) {
463498
err := s.melody.HandleRequest(w, r)
@@ -470,8 +505,26 @@ func (s *Server) AddWebSocket() {
470505
})
471506
})
472507

473-
s.melody.HandleMessage(func(_ *melody.Session, msg []byte) {
508+
// Track connection lifecycle
509+
s.melody.HandleConnect(func(session *melody.Session) {
510+
session.Set("connected_at", time.Now())
511+
wsConnectionsActive.Inc()
512+
})
513+
514+
s.melody.HandleDisconnect(func(session *melody.Session) {
515+
wsConnectionsActive.Dec()
516+
if connectedAt, ok := session.Get("connected_at"); ok {
517+
duration := time.Since(connectedAt.(time.Time))
518+
wsConnectionDuration.Observe(duration.Seconds())
519+
}
520+
})
521+
522+
// Track message metrics
523+
s.melody.HandleMessage(func(session *melody.Session, msg []byte) {
524+
start := time.Now()
525+
wsMessagesReceived.Inc()
474526
s.melody.Broadcast(msg)
527+
wsMessageProcessingDuration.Observe(time.Since(start).Seconds())
475528
})
476529
}
477530

@@ -1573,12 +1626,20 @@ func ViteProxyHandler() http.Handler {
15731626
// (they are defined outside router.Group() blocks with traceInstaller.Install()).
15741627
func isInternalRoute(pattern string) bool {
15751628
switch pattern {
1576-
case "/metrics", "/ready", "/healthz":
1629+
case "/metrics", "/ready", "/healthz", "/ws":
15771630
return true
15781631
}
15791632
return strings.HasPrefix(pattern, "/debug/pprof/")
15801633
}
15811634

1635+
// excludeWebSocketFromOTel returns an otelhttp.Option that excludes /ws from tracing and metrics.
1636+
// WebSocket connections are long-lived and would skew HTTP latency data.
1637+
func excludeWebSocketFromOTel() otelhttp.Option {
1638+
return otelhttp.WithFilter(func(r *http.Request) bool {
1639+
return r.URL.Path != "/ws"
1640+
})
1641+
}
1642+
15821643
// HTTPMetricsMiddleware records Prometheus metrics for HTTP requests.
15831644
// It captures request count, duration (histogram), and duration (gauge) with labels
15841645
// for method, path (route pattern), and status code.

0 commit comments

Comments
 (0)