Skip to content

Commit 9e05fc7

Browse files
committed
avoid telemetry failing interfering with mcp responses
Server was crashing with 404 errors when writing to telemetry. So add some control of panic error on the calls to telemetry, to tolerate failures but don't make them fatal. Also implement the flush method, that is essential for sse/streaming Closes: #2114
1 parent 81e6666 commit 9e05fc7

File tree

1 file changed

+31
-2
lines changed

1 file changed

+31
-2
lines changed

pkg/telemetry/middleware.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,13 @@ func NewHTTPMiddleware(
9393
// to leverage the parsed MCP data.
9494
func (m *HTTPMiddleware) Handler(next http.Handler) http.Handler {
9595
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
96+
// Ultimate safety net - telemetry must NEVER crash the service
97+
defer func() {
98+
if rec := recover(); rec != nil {
99+
logger.Errorf("Telemetry middleware panic (non-fatal): %v", rec)
100+
}
101+
}()
102+
96103
ctx := r.Context()
97104

98105
// Handle SSE endpoints specially - they are long-lived connections
@@ -123,7 +130,15 @@ func (m *HTTPMiddleware) Handler(next http.Handler) http.Handler {
123130
// Create span name based on MCP method if available, otherwise use HTTP method + path
124131
spanName := m.createSpanName(ctx, r)
125132
ctx, span := m.tracer.Start(ctx, spanName, trace.WithSpanKind(trace.SpanKindServer))
126-
defer span.End()
133+
// End span with error handling - this is where OTLP export happens
134+
defer func() {
135+
defer func() {
136+
if rec := recover(); rec != nil {
137+
logger.Debugf("Telemetry span.End() panic (non-fatal): %v", rec)
138+
}
139+
}()
140+
span.End()
141+
}()
127142

128143
// Create a response writer wrapper to capture response details
129144
rw := &responseWriter{
@@ -405,9 +420,16 @@ type responseWriter struct {
405420
bytesWritten int64
406421
}
407422

408-
// WriteHeader captures the status code.
423+
// WriteHeader captures the status code with panic protection.
409424
func (rw *responseWriter) WriteHeader(statusCode int) {
410425
rw.statusCode = statusCode
426+
427+
// Wrap the actual WriteHeader call to catch any panics (including duplicate calls)
428+
defer func() {
429+
if rec := recover(); rec != nil {
430+
logger.Debugf("WriteHeader panic recovered (non-fatal): %v", rec)
431+
}
432+
}()
411433
rw.ResponseWriter.WriteHeader(statusCode)
412434
}
413435

@@ -418,6 +440,13 @@ func (rw *responseWriter) Write(data []byte) (int, error) {
418440
return n, err
419441
}
420442

443+
// Flush implements http.Flusher if the underlying ResponseWriter supports it.
444+
func (rw *responseWriter) Flush() {
445+
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
446+
flusher.Flush()
447+
}
448+
}
449+
421450
// recordMetrics records request metrics.
422451
func (m *HTTPMiddleware) recordMetrics(ctx context.Context, r *http.Request, rw *responseWriter, duration time.Duration) {
423452
// Get MCP method from context if available

0 commit comments

Comments
 (0)