Skip to content

Commit 9bdd87e

Browse files
authored
fix: distinguish /messages endpoint metrics from /chat/completions (#1373)
**Description** Despite the documentation about "messages" operation, the anthropic messages processor was wrongly using the chat completions metrics implementation hence the metrics produced in /messages endpoint were mixed up with the ones in /chat/completions metrics. This fixes it and make it in line with the documentation. --------- Signed-off-by: Takeshi Yoneda <[email protected]>
1 parent 86f1a2d commit 9bdd87e

File tree

6 files changed

+57
-4
lines changed

6 files changed

+57
-4
lines changed

cmd/extproc/mainlib/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
231231
return fmt.Errorf("failed to create metrics: %w", err)
232232
}
233233
chatCompletionMetrics := metrics.NewChatCompletion(meter, metricsRequestHeaderAttributes)
234+
messagesMetrics := metrics.NewMessages(meter, metricsRequestHeaderAttributes)
234235
completionMetrics := metrics.NewCompletion(meter, metricsRequestHeaderAttributes)
235236
embeddingsMetrics := metrics.NewEmbeddings(meter, metricsRequestHeaderAttributes)
236237
mcpMetrics := metrics.NewMCP(meter, metricsRequestHeaderAttributes)
@@ -248,7 +249,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
248249
server.Register(path.Join(flags.rootPrefix, "/v1/completions"), extproc.CompletionsProcessorFactory(completionMetrics))
249250
server.Register(path.Join(flags.rootPrefix, "/v1/embeddings"), extproc.EmbeddingsProcessorFactory(embeddingsMetrics))
250251
server.Register(path.Join(flags.rootPrefix, "/v1/models"), extproc.NewModelsProcessor)
251-
server.Register(path.Join(flags.rootPrefix, "/anthropic/v1/messages"), extproc.MessagesProcessorFactory(chatCompletionMetrics))
252+
server.Register(path.Join(flags.rootPrefix, "/anthropic/v1/messages"), extproc.MessagesProcessorFactory(messagesMetrics))
252253

253254
if watchErr := extproc.StartConfigWatcher(ctx, flags.configPath, server, l, time.Second*5); watchErr != nil {
254255
return fmt.Errorf("failed to start config watcher: %w", watchErr)

internal/extproc/messages_processor.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
//
3232
// Requests: Only accepts Anthropic format requests.
3333
// Responses: Returns Anthropic format responses.
34-
func MessagesProcessorFactory(ccm metrics.ChatCompletionMetrics) ProcessorFactory {
34+
func MessagesProcessorFactory(ccm metrics.MessagesMetrics) ProcessorFactory {
3535
return func(config *processorConfig, requestHeaders map[string]string, logger *slog.Logger, _ tracing.Tracing, isUpstreamFilter bool) (Processor, error) {
3636
logger = logger.With("processor", "anthropic-messages", "isUpstreamFilter", fmt.Sprintf("%v", isUpstreamFilter))
3737
if !isUpstreamFilter {
@@ -145,7 +145,7 @@ type messagesProcessorUpstreamFilter struct {
145145
translator translator.AnthropicMessagesTranslator
146146
onRetry bool
147147
stream bool
148-
metrics metrics.ChatCompletionMetrics
148+
metrics metrics.MessagesMetrics
149149
costs translator.LLMTokenUsage
150150
}
151151

internal/metrics/genai.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const (
2727
genaiOperationChat = "chat"
2828
genaiOperationCompletion = "completion"
2929
genaiOperationEmbedding = "embeddings"
30+
genaiOperationMessages = "messages"
3031
genaiProviderOpenAI = "openai"
3132
genaiProviderAWSBedrock = "aws.bedrock"
3233
genaiTokenTypeInput = "input"
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright Envoy AI Gateway Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
// The full text of the Apache license is available in the LICENSE file at
4+
// the root of the repo.
5+
6+
package metrics
7+
8+
import "go.opentelemetry.io/otel/metric"
9+
10+
// MessagesMetrics is the interface for the /messages endpoint AI Gateway metrics.
11+
//
12+
// Semantically, it is identical to ChatCompletionMetrics, so it embeds that interface.
13+
//
14+
// The only different is that it has the operation name "messages" instead of "chat".
15+
type MessagesMetrics interface {
16+
ChatCompletionMetrics
17+
}
18+
19+
// NewMessages creates a new x.MessagesMetrics instance.
20+
func NewMessages(meter metric.Meter, requestHeaderLabelMapping map[string]string) MessagesMetrics {
21+
return &chatCompletion{
22+
baseMetrics: newBaseMetrics(meter, genaiOperationMessages, requestHeaderLabelMapping),
23+
}
24+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright Envoy AI Gateway Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
// The full text of the Apache license is available in the LICENSE file at
4+
// the root of the repo.
5+
6+
package metrics
7+
8+
import (
9+
"testing"
10+
11+
"github.com/stretchr/testify/require"
12+
"go.opentelemetry.io/otel/sdk/metric"
13+
)
14+
15+
func TestNewMessages(t *testing.T) {
16+
mr := metric.NewManualReader()
17+
meter := metric.NewMeterProvider(metric.WithReader(mr)).Meter("test")
18+
pm, ok := NewMessages(meter, nil).(*chatCompletion)
19+
require.True(t, ok)
20+
require.NotNil(t, pm)
21+
require.NotNil(t, pm.baseMetrics)
22+
require.Equal(t, genaiOperationMessages, pm.operation)
23+
}

site/docs/capabilities/observability/metrics.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ For example, the Envoy AI Gateway collects metrics such as:
3131

3232
Each metric comes with some default attributes such as:
3333

34-
- `gen_ai.operation.name` - The operation type (`chat`, `completion`, `embeddings`, `messages`)
34+
- `gen_ai.operation.name`
35+
- `chat`: For `/v1/chat/completions` endpoint.
36+
- `completion`: For `/v1/completions` endpoint.
37+
- `embedding`: For `/v1/embeddings` endpoint.
38+
- `messages`: For `/anthropic/v1/messages` endpoint.
3539
- `gen_ai.original.model` - The original model name from the request body
3640
- `gen_ai.request.model` - The model name requested (may be overridden)
3741
- `gen_ai.response.model` - The model name returned in the response

0 commit comments

Comments
 (0)