stacklok · Deepam02 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025 · blkt
diff --git a/pkg/telemetry/integration_test.go b/pkg/telemetry/integration_test.go
@@ -483,3 +483,44 @@ func TestTelemetryIntegration_MultipleRequests(t *testing.T) {
 	assert.Contains(t, metricsBody, "toolhive_mcp_requests")
 	assert.Contains(t, metricsBody, `server="multi-test"`)
 }
+
+func TestTelemetryIntegration_ToolErrorDetection(t *testing.T) {
+	t.Parallel()
+	// Setup test providers
+	exporter := tracetest.NewInMemoryExporter()
+	tracerProvider := sdktrace.NewTracerProvider(sdktrace.WithSyncer(exporter))
+	meterProvider := sdkmetric.NewMeterProvider()
+
+	config := Config{ServiceName: "test", ServiceVersion: "1.0.0"}
+	middleware := NewHTTPMiddleware(config, tracerProvider, meterProvider, "test", "stdio")
+
+	// Test tool call with error
+	testHandler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+		w.Write([]byte(`{"result":{"isError":true}}`))
+	})
+
+	mcpRequest := &mcp.ParsedMCPRequest{Method: "tools/call", ID: "test", IsRequest: true}
+	req := httptest.NewRequest("POST", "/messages", nil)
+	ctx := context.WithValue(req.Context(), mcp.MCPRequestContextKey, mcpRequest)
+	req = req.WithContext(ctx)
+
+	rec := httptest.NewRecorder()
+	middleware(testHandler).ServeHTTP(rec, req)
+
+	// Verify span has error attribute
+	tracerProvider.ForceFlush(ctx)
+	spans := exporter.GetSpans()
+	require.Len(t, spans, 1)
+
+	span := spans[0]
+	assert.Equal(t, "mcp.tools/call", span.Name)
+
+	// Check for tool error attribute
+	for _, attr := range span.Attributes {
+		if attr.Key == "mcp.tool.error" {
+			assert.True(t, attr.Value.AsBool())
+			return
+		}
+	}
+	t.Error("Expected mcp.tool.error attribute not found")
+}
diff --git a/pkg/telemetry/middleware.go b/pkg/telemetry/middleware.go
@@ -1,6 +1,7 @@
 package telemetry
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -130,6 +131,7 @@ func (m *HTTPMiddleware) Handler(next http.Handler) http.Handler {
 			ResponseWriter: w,
 			statusCode:     http.StatusOK,
 			bytesWritten:   0,
+			isToolCall:     mcpparser.GetMCPMethod(ctx) == string(mcp.MethodToolsCall),
 		}
 
 		// Add HTTP attributes
@@ -390,19 +392,36 @@ func (*HTTPMiddleware) finalizeSpan(span trace.Span, rw *responseWriter, duratio
 		attribute.Float64("http.duration_ms", float64(duration.Nanoseconds())/1e6),
 	)
 
-	// Set span status based on HTTP status code
+	// Add MCP tool error indicator if detected
+	if rw.isToolCall {
+		span.SetAttributes(attribute.Bool("mcp.tool.error", rw.hasToolError))
+	}
+
+	// Set span status based on HTTP status code AND MCP tool errors
 	if rw.statusCode >= 400 {
 		span.SetStatus(codes.Error, fmt.Sprintf("HTTP %d", rw.statusCode))
+	} else if rw.hasToolError {
+		span.SetStatus(codes.Error, "MCP tool execution error")
 	} else {
 		span.SetStatus(codes.Ok, "")
 	}
 }
 
+// detectMCPToolError performs lightweight detection of MCP tool execution errors
+// Returns true if the response likely contains a tool execution error
+func detectMCPToolError(data []byte) bool {
+	// Quick byte search for common error patterns
+	return bytes.Contains(data, []byte(`"isError":true`)) ||
+		bytes.Contains(data, []byte(`"isError": true`))
+}
+
 // responseWriter wraps http.ResponseWriter to capture response details.
 type responseWriter struct {
 	http.ResponseWriter
 	statusCode   int
 	bytesWritten int64
+	hasToolError bool // tracks if MCP tool execution error is detected
+	isToolCall   bool // tracks if this is a tools/call request
 }
 
 // WriteHeader captures the status code.
@@ -411,10 +430,16 @@ func (rw *responseWriter) WriteHeader(statusCode int) {
 	rw.ResponseWriter.WriteHeader(statusCode)
 }
 
-// Write captures the number of bytes written.
+// Write captures the number of bytes written and detects tool errors.
 func (rw *responseWriter) Write(data []byte) (int, error) {
 	n, err := rw.ResponseWriter.Write(data)
 	rw.bytesWritten += int64(n)
+
+	// Only check for tool errors if this is a tool call and we haven't found one yet
+	if rw.isToolCall && !rw.hasToolError {
+		rw.hasToolError = detectMCPToolError(data)
+	}
+
 	return n, err
 }
 
@@ -426,10 +451,12 @@ func (m *HTTPMiddleware) recordMetrics(ctx context.Context, r *http.Request, rw
 		mcpMethod = "unknown"
 	}
 
-	// Determine status (success/error)
+	// Determine status (success/error/tool_error)
 	status := "success"
 	if rw.statusCode >= 400 {
 		status = "error"
+	} else if rw.hasToolError {
+		status = "tool_error"
 	}
 
 	// Common attributes for all metrics

diff --git a/pkg/telemetry/middleware_test.go b/pkg/telemetry/middleware_test.go
@@ -1491,3 +1491,30 @@ func TestFactoryMiddleware_Integration(t *testing.T) {
 		assert.NoError(t, err)
 	})
 }
+
+func TestDetectMCPToolError(t *testing.T) {
+	t.Parallel()
+	assert.False(t, detectMCPToolError([]byte(`{"result":{"isError":false}}`)))
+	assert.True(t, detectMCPToolError([]byte(`{"result":{"isError":true}}`)))
+	assert.False(t, detectMCPToolError([]byte(`{"result":{"content":"test"}}`)))
+}
+
+func TestResponseWriter_ToolErrorDetection(t *testing.T) {
+	t.Parallel()
+	rec := httptest.NewRecorder()
+
+	// Tool call with error
+	rw := &responseWriter{ResponseWriter: rec, isToolCall: true}
+	rw.Write([]byte(`{"result":{"isError":true}}`))
+	assert.True(t, rw.hasToolError)
+
+	// Tool call without error
+	rw = &responseWriter{ResponseWriter: rec, isToolCall: true}
+	rw.Write([]byte(`{"result":{"isError":false}}`))
+	assert.False(t, rw.hasToolError)
+
+	// Non-tool call should not detect errors
+	rw = &responseWriter{ResponseWriter: rec, isToolCall: false}
+	rw.Write([]byte(`{"result":{"isError":true}}`))
+	assert.False(t, rw.hasToolError)
+}