fix: distinguish /messages endpoint metrics from /chat/completions (#1373)

mathetake · web-flow · commit 9bdd87e1b697 · 2025-10-15T23:49:37.000Z
**Description**

Despite the documentation about "messages" operation, the anthropic
messages processor was wrongly using the chat completions metrics
implementation hence the metrics produced in /messages endpoint were
mixed up with the ones in /chat/completions metrics.

This fixes it and make it in line with the documentation.

---------

Signed-off-by: Takeshi Yoneda &lt;t.y.mathetake@gmail.com&gt;
diff --git a/cmd/extproc/mainlib/main.go b/cmd/extproc/mainlib/main.go
@@ -231,6 +231,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
 		return fmt.Errorf("failed to create metrics: %w", err)
 	}
 	chatCompletionMetrics := metrics.NewChatCompletion(meter, metricsRequestHeaderAttributes)
+	messagesMetrics := metrics.NewMessages(meter, metricsRequestHeaderAttributes)
 	completionMetrics := metrics.NewCompletion(meter, metricsRequestHeaderAttributes)
 	embeddingsMetrics := metrics.NewEmbeddings(meter, metricsRequestHeaderAttributes)
 	mcpMetrics := metrics.NewMCP(meter, metricsRequestHeaderAttributes)
@@ -248,7 +249,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
 	server.Register(path.Join(flags.rootPrefix, "/v1/completions"), extproc.CompletionsProcessorFactory(completionMetrics))
 	server.Register(path.Join(flags.rootPrefix, "/v1/embeddings"), extproc.EmbeddingsProcessorFactory(embeddingsMetrics))
 	server.Register(path.Join(flags.rootPrefix, "/v1/models"), extproc.NewModelsProcessor)
-	server.Register(path.Join(flags.rootPrefix, "/anthropic/v1/messages"), extproc.MessagesProcessorFactory(chatCompletionMetrics))
+	server.Register(path.Join(flags.rootPrefix, "/anthropic/v1/messages"), extproc.MessagesProcessorFactory(messagesMetrics))
 
 	if watchErr := extproc.StartConfigWatcher(ctx, flags.configPath, server, l, time.Second*5); watchErr != nil {
 		return fmt.Errorf("failed to start config watcher: %w", watchErr)
diff --git a/internal/extproc/messages_processor.go b/internal/extproc/messages_processor.go
@@ -31,7 +31,7 @@ import (
 //
 // Requests: Only accepts Anthropic format requests.
 // Responses: Returns Anthropic format responses.
-func MessagesProcessorFactory(ccm metrics.ChatCompletionMetrics) ProcessorFactory {
+func MessagesProcessorFactory(ccm metrics.MessagesMetrics) ProcessorFactory {
 	return func(config *processorConfig, requestHeaders map[string]string, logger *slog.Logger, _ tracing.Tracing, isUpstreamFilter bool) (Processor, error) {
 		logger = logger.With("processor", "anthropic-messages", "isUpstreamFilter", fmt.Sprintf("%v", isUpstreamFilter))
 		if !isUpstreamFilter {
@@ -145,7 +145,7 @@ type messagesProcessorUpstreamFilter struct {
 	translator             translator.AnthropicMessagesTranslator
 	onRetry                bool
 	stream                 bool
-	metrics                metrics.ChatCompletionMetrics
+	metrics                metrics.MessagesMetrics
 	costs                  translator.LLMTokenUsage
 }
 
diff --git a/internal/metrics/genai.go b/internal/metrics/genai.go
@@ -27,6 +27,7 @@ const (
 	genaiOperationChat       = "chat"
 	genaiOperationCompletion = "completion"
 	genaiOperationEmbedding  = "embeddings"
+	genaiOperationMessages   = "messages"
 	genaiProviderOpenAI      = "openai"
 	genaiProviderAWSBedrock  = "aws.bedrock"
 	genaiTokenTypeInput      = "input"
diff --git a/internal/metrics/messages_metrics.go b/internal/metrics/messages_metrics.go
@@ -0,0 +1,24 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package metrics
+
+import "go.opentelemetry.io/otel/metric"
+
+// MessagesMetrics is the interface for the /messages endpoint AI Gateway metrics.
+//
+// Semantically, it is identical to ChatCompletionMetrics, so it embeds that interface.
+//
+// The only different is that it has the operation name "messages" instead of "chat".
+type MessagesMetrics interface {
+	ChatCompletionMetrics
+}
+
+// NewMessages creates a new x.MessagesMetrics instance.
+func NewMessages(meter metric.Meter, requestHeaderLabelMapping map[string]string) MessagesMetrics {
+	return &chatCompletion{
+		baseMetrics: newBaseMetrics(meter, genaiOperationMessages, requestHeaderLabelMapping),
+	}
+}
diff --git a/internal/metrics/messages_metrics_test.go b/internal/metrics/messages_metrics_test.go
@@ -0,0 +1,23 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package metrics
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"go.opentelemetry.io/otel/sdk/metric"
+)
+
+func TestNewMessages(t *testing.T) {
+	mr := metric.NewManualReader()
+	meter := metric.NewMeterProvider(metric.WithReader(mr)).Meter("test")
+	pm, ok := NewMessages(meter, nil).(*chatCompletion)
+	require.True(t, ok)
+	require.NotNil(t, pm)
+	require.NotNil(t, pm.baseMetrics)
+	require.Equal(t, genaiOperationMessages, pm.operation)
+}
diff --git a/site/docs/capabilities/observability/metrics.md b/site/docs/capabilities/observability/metrics.md
@@ -31,7 +31,11 @@ For example, the Envoy AI Gateway collects metrics such as:
 
 Each metric comes with some default attributes such as:
 
-- `gen_ai.operation.name` - The operation type (`chat`, `completion`, `embeddings`, `messages`)
+- `gen_ai.operation.name`
+  - `chat`: For `/v1/chat/completions` endpoint.
+  - `completion`: For `/v1/completions` endpoint.
+  - `embedding`: For `/v1/embeddings` endpoint.
+  - `messages`: For `/anthropic/v1/messages` endpoint.
 - `gen_ai.original.model` - The original model name from the request body
 - `gen_ai.request.model` - The model name requested (may be overridden)
 - `gen_ai.response.model` - The model name returned in the response