Skip to content

Commit d7214b8

Browse files
committed
avoid telemetry failing interfering with mcp responses
Server was crashing with 404 errors when writing to telemetry Closes: #2114
1 parent ad59fdc commit d7214b8

File tree

1 file changed

+32
-3
lines changed

1 file changed

+32
-3
lines changed

pkg/telemetry/middleware.go

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,13 @@ func NewHTTPMiddleware(
9393
// to leverage the parsed MCP data.
9494
func (m *HTTPMiddleware) Handler(next http.Handler) http.Handler {
9595
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
96+
// Ultimate safety net - telemetry must NEVER crash the service
97+
defer func() {
98+
if rec := recover(); rec != nil {
99+
logger.Errorf("Telemetry middleware panic (non-fatal): %v", rec)
100+
}
101+
}()
102+
96103
ctx := r.Context()
97104

98105
// Handle SSE endpoints specially - they are long-lived connections
@@ -123,7 +130,15 @@ func (m *HTTPMiddleware) Handler(next http.Handler) http.Handler {
123130
// Create span name based on MCP method if available, otherwise use HTTP method + path
124131
spanName := m.createSpanName(ctx, r)
125132
ctx, span := m.tracer.Start(ctx, spanName, trace.WithSpanKind(trace.SpanKindServer))
126-
defer span.End()
133+
// End span with error handling - this is where OTLP export happens
134+
defer func() {
135+
defer func() {
136+
if rec := recover(); rec != nil {
137+
logger.Debugf("Telemetry span.End() panic (non-fatal): %v", rec)
138+
}
139+
}()
140+
span.End()
141+
}()
127142

128143
// Create a response writer wrapper to capture response details
129144
rw := &responseWriter{
@@ -144,7 +159,7 @@ func (m *HTTPMiddleware) Handler(next http.Handler) http.Handler {
144159
// Record request start time
145160
startTime := time.Now()
146161

147-
// Call the next handler with the instrumented context
162+
// Call the next handler with the wrapped response writer to capture details
148163
next.ServeHTTP(rw, r.WithContext(ctx))
149164

150165
// Record completion metrics and finalize span
@@ -405,9 +420,16 @@ type responseWriter struct {
405420
bytesWritten int64
406421
}
407422

408-
// WriteHeader captures the status code.
423+
// WriteHeader captures the status code with panic protection.
409424
func (rw *responseWriter) WriteHeader(statusCode int) {
410425
rw.statusCode = statusCode
426+
427+
// Wrap the actual WriteHeader call to catch any panics (including duplicate calls)
428+
defer func() {
429+
if rec := recover(); rec != nil {
430+
logger.Debugf("WriteHeader panic recovered (non-fatal): %v", rec)
431+
}
432+
}()
411433
rw.ResponseWriter.WriteHeader(statusCode)
412434
}
413435

@@ -418,6 +440,13 @@ func (rw *responseWriter) Write(data []byte) (int, error) {
418440
return n, err
419441
}
420442

443+
// Flush implements http.Flusher if the underlying ResponseWriter supports it.
444+
func (rw *responseWriter) Flush() {
445+
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
446+
flusher.Flush()
447+
}
448+
}
449+
421450
// recordMetrics records request metrics.
422451
func (m *HTTPMiddleware) recordMetrics(ctx context.Context, r *http.Request, rw *responseWriter, duration time.Duration) {
423452
// Get MCP method from context if available

0 commit comments

Comments
 (0)