feat(observability): add comprehensive monitoring for image generation

nutanix-Hrushikesh · nutanix-Hrushikesh · commit 19a00f936ea0 · 2025-10-06T16:44:28.000+05:30
- Implement image generation metrics collection and reportz
- Add distributed tracing support for image generation requests
- Integrate OpenInference tracing for OpenAI image gen
- Add API tracing support for image generation endpoints
- Include comprehensive test coverage for observability features

Signed-off-by: Hrushikesh Patil &lt;hrushikesh.patil@nutanix.com&gt;
diff --git a/internal/metrics/image_generation_metrics.go b/internal/metrics/image_generation_metrics.go
@@ -13,6 +13,7 @@ import (
 	"go.opentelemetry.io/otel/metric"
 
 	"github.com/envoyproxy/ai-gateway/internal/filterapi"
+	"github.com/envoyproxy/ai-gateway/internal/internalapi"
 )
 
 // imageGeneration is the implementation for the image generation AI Gateway metrics.
@@ -24,14 +25,16 @@ type imageGeneration struct {
 type ImageGenerationMetrics interface {
 	// StartRequest initializes timing for a new request.
 	StartRequest(headers map[string]string)
-	// SetModel sets the model the request. This is usually called after parsing the request body .
-	SetModel(model string)
+	// SetRequestModel sets the request model name.
+	SetRequestModel(requestModel internalapi.RequestModel)
+	// SetResponseModel sets the response model name.
+	SetResponseModel(responseModel internalapi.ResponseModel)
 	// SetBackend sets the selected backend when the routing decision has been made. This is usually called
 	// after parsing the request body to determine the model and invoke the routing logic.
 	SetBackend(backend *filterapi.Backend)
 
-	// RecordTokenUsage records token usage metrics (for image generation, this will typically be 0).
-	RecordTokenUsage(ctx context.Context, inputTokens, outputTokens, totalTokens uint32, requestHeaderLabelMapping map[string]string)
+	// RecordTokenUsage records token usage metrics (image gen typically 0, but supported).
+	RecordTokenUsage(ctx context.Context, inputTokens, outputTokens uint32, requestHeaderLabelMapping map[string]string)
 	// RecordRequestCompletion records latency metrics for the entire request.
 	RecordRequestCompletion(ctx context.Context, success bool, requestHeaderLabelMapping map[string]string)
 	// RecordImageGeneration records metrics specific to image generation.
@@ -50,13 +53,18 @@ func (i *imageGeneration) StartRequest(headers map[string]string) {
 	i.baseMetrics.StartRequest(headers)
 }
 
-// SetModel sets the model for the request.
-func (i *imageGeneration) SetModel(model string) {
-	i.baseMetrics.SetModel(model, model)
+// SetRequestModel sets the request model for the request.
+func (i *imageGeneration) SetRequestModel(requestModel internalapi.RequestModel) {
+	i.baseMetrics.SetRequestModel(requestModel)
+}
+
+// SetResponseModel sets the response model for the request.
+func (i *imageGeneration) SetResponseModel(responseModel internalapi.ResponseModel) {
+	i.baseMetrics.SetResponseModel(responseModel)
 }
 
 // RecordTokenUsage implements [ImageGeneration.RecordTokenUsage].
-func (i *imageGeneration) RecordTokenUsage(ctx context.Context, inputTokens, outputTokens, totalTokens uint32, requestHeaders map[string]string) {
+func (i *imageGeneration) RecordTokenUsage(ctx context.Context, inputTokens, outputTokens uint32, requestHeaders map[string]string) {
 	attrs := i.buildBaseAttributes(requestHeaders)
 
 	// For image generation, token usage is typically 0, but we still record it for consistency
diff --git a/internal/metrics/image_generation_metrics_test.go b/internal/metrics/image_generation_metrics_test.go
@@ -0,0 +1,113 @@
+// Copyright Envoy AI Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package metrics
+
+import (
+	"testing"
+	"testing/synctest"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/sdk/metric"
+
+	"github.com/envoyproxy/ai-gateway/internal/filterapi"
+)
+
+func TestImageGeneration_RecordTokenUsage(t *testing.T) {
+	// Mirrors chat/embeddings token usage tests, but for image_generation.
+	var (
+		mr    = metric.NewManualReader()
+		meter = metric.NewMeterProvider(metric.WithReader(mr)).Meter("test")
+		im    = NewImageGeneration(meter, nil).(*imageGeneration)
+
+		attrsBase = []attribute.KeyValue{
+			attribute.Key(genaiAttributeOperationName).String(genaiOperationImageGeneration),
+			attribute.Key(genaiAttributeProviderName).String(genaiProviderOpenAI),
+			attribute.Key(genaiAttributeRequestModel).String("test-model"),
+			attribute.Key(genaiAttributeResponseModel).String("test-model"),
+		}
+		inputAttrs  = attribute.NewSet(append(attrsBase, attribute.Key(genaiAttributeTokenType).String(genaiTokenTypeInput))...)
+		outputAttrs = attribute.NewSet(append(attrsBase, attribute.Key(genaiAttributeTokenType).String(genaiTokenTypeOutput))...)
+	)
+
+	// Set labels and record usage.
+	im.SetModel("test-model", "test-model")
+	im.SetBackend(&filterapi.Backend{Schema: filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI}})
+	im.RecordTokenUsage(t.Context(), 3, 7, nil)
+
+	count, sum := getHistogramValues(t, mr, genaiMetricClientTokenUsage, inputAttrs)
+	assert.Equal(t, uint64(1), count)
+	assert.Equal(t, 3.0, sum)
+
+	count, sum = getHistogramValues(t, mr, genaiMetricClientTokenUsage, outputAttrs)
+	assert.Equal(t, uint64(1), count)
+	assert.Equal(t, 7.0, sum)
+}
+
+func TestImageGeneration_RecordImageGeneration(t *testing.T) {
+	// Use synctest to keep time-based assertions deterministic.
+	synctest.Test(t, func(t *testing.T) {
+		mr := metric.NewManualReader()
+		meter := metric.NewMeterProvider(metric.WithReader(mr)).Meter("test")
+		im := NewImageGeneration(meter, nil).(*imageGeneration)
+
+		// Base attributes plus image-specific ones
+		attrs := attribute.NewSet(
+			attribute.Key(genaiAttributeOperationName).String(genaiOperationImageGeneration),
+			attribute.Key(genaiAttributeProviderName).String(genaiProviderOpenAI),
+			attribute.Key(genaiAttributeRequestModel).String("img-model"),
+			attribute.Key(genaiAttributeResponseModel).String("img-model"),
+			attribute.Key("gen_ai.image.count").Int(2),
+			attribute.Key("gen_ai.image.model").String("img-model"),
+			attribute.Key("gen_ai.image.size").String("1024x1024"),
+		)
+
+		im.StartRequest(nil)
+		im.SetModel("img-model", "img-model")
+		im.SetBackend(&filterapi.Backend{Schema: filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI}})
+
+		time.Sleep(10 * time.Millisecond)
+		im.RecordImageGeneration(t.Context(), 2, "img-model", "1024x1024", nil)
+
+		count, sum := getHistogramValues(t, mr, genaiMetricServerRequestDuration, attrs)
+		assert.Equal(t, uint64(1), count)
+		assert.Equal(t, 10*time.Millisecond.Seconds(), sum)
+	})
+}
+
+func TestImageGeneration_HeaderLabelMapping(t *testing.T) {
+	// Verify header mapping is honored for token usage metrics.
+	var (
+		mr            = metric.NewManualReader()
+		meter         = metric.NewMeterProvider(metric.WithReader(mr)).Meter("test")
+		headerMapping = map[string]string{"x-user-id": "user_id", "x-org-id": "org_id"}
+		im            = NewImageGeneration(meter, headerMapping).(*imageGeneration)
+	)
+
+	requestHeaders := map[string]string{
+		"x-user-id": "user123",
+		"x-org-id":  "org456",
+	}
+
+	im.SetModel("test-model", "test-model")
+	im.SetBackend(&filterapi.Backend{Schema: filterapi.VersionedAPISchema{Name: filterapi.APISchemaOpenAI}})
+	im.RecordTokenUsage(t.Context(), 5, 0, requestHeaders)
+
+	attrs := attribute.NewSet(
+		attribute.Key(genaiAttributeOperationName).String(genaiOperationImageGeneration),
+		attribute.Key(genaiAttributeProviderName).String(genaiProviderOpenAI),
+		attribute.Key(genaiAttributeRequestModel).String("test-model"),
+		attribute.Key(genaiAttributeResponseModel).String("test-model"),
+		attribute.Key(genaiAttributeTokenType).String(genaiTokenTypeInput),
+		attribute.Key("user_id").String("user123"),
+		attribute.Key("org_id").String("org456"),
+	)
+
+	count, _ := getHistogramValues(t, mr, genaiMetricClientTokenUsage, attrs)
+	require.Equal(t, uint64(1), count)
+}
diff --git a/internal/tracing/api/api.go b/internal/tracing/api/api.go
@@ -15,6 +15,7 @@ import (
 	"go.opentelemetry.io/otel/trace"
 
 	"github.com/envoyproxy/ai-gateway/internal/apischema/openai"
+	openaisdk "github.com/openai/openai-go/v2"
 )
 
 var _ Tracing = NoopTracing{}
@@ -184,13 +185,13 @@ type ImageGenerationTracer interface {
 	//   - req: The OpenAI image generation request. Used to record request attributes.
 	//
 	// Returns nil unless the span is sampled.
-	StartSpanAndInjectHeaders(ctx context.Context, headers map[string]string, headerMutation *extprocv3.HeaderMutation, req *openai.ImageGenerationRequest, body []byte) ImageGenerationSpan
+	StartSpanAndInjectHeaders(ctx context.Context, headers map[string]string, headerMutation *extprocv3.HeaderMutation, req *openaisdk.ImageGenerateParams, body []byte) ImageGenerationSpan
 }
 
 // ImageGenerationSpan represents an OpenAI image generation.
 type ImageGenerationSpan interface {
 	// RecordResponse records the response attributes to the span.
-	RecordResponse(resp *openai.ImageGenerationResponse)
+	RecordResponse(resp *openaisdk.ImagesResponse)
 
 	// EndSpanOnError finalizes and ends the span with an error status.
 	EndSpanOnError(statusCode int, body []byte)
@@ -210,17 +211,17 @@ type ImageGenerationRecorder interface {
 	//
 	// Note: Do not do any expensive data conversions as the span might not be
 	// sampled.
-	StartParams(req *openai.ImageGenerationRequest, body []byte) (spanName string, opts []trace.SpanStartOption)
+	StartParams(req *openaisdk.ImageGenerateParams, body []byte) (spanName string, opts []trace.SpanStartOption)
 
 	// RecordRequest records request attributes to the span.
 	//
 	// Parameters:
 	//   - req: contains the image generation request
 	//   - body: contains the complete request body.
-	RecordRequest(span trace.Span, req *openai.ImageGenerationRequest, body []byte)
+	RecordRequest(span trace.Span, req *openaisdk.ImageGenerateParams, body []byte)
 
 	// RecordResponse records response attributes to the span.
-	RecordResponse(span trace.Span, resp *openai.ImageGenerationResponse)
+	RecordResponse(span trace.Span, resp *openaisdk.ImagesResponse)
 
 	// RecordResponseOnError ends recording the span with an error status.
 	RecordResponseOnError(span trace.Span, statusCode int, body []byte)
@@ -257,7 +258,7 @@ type EmbeddingsRecorder interface {
 type NoopImageGenerationTracer struct{}
 
 // StartSpanAndInjectHeaders implements ImageGenerationTracer.StartSpanAndInjectHeaders.
-func (NoopImageGenerationTracer) StartSpanAndInjectHeaders(context.Context, map[string]string, *extprocv3.HeaderMutation, *openai.ImageGenerationRequest, []byte) ImageGenerationSpan {
+func (NoopImageGenerationTracer) StartSpanAndInjectHeaders(context.Context, map[string]string, *extprocv3.HeaderMutation, *openaisdk.ImageGenerateParams, []byte) ImageGenerationSpan {
 	return nil
 }
 
diff --git a/internal/tracing/image_generation_span.go b/internal/tracing/image_generation_span.go
@@ -8,8 +8,8 @@ package tracing
 import (
 	"go.opentelemetry.io/otel/trace"
 
-	"github.com/envoyproxy/ai-gateway/internal/apischema/openai"
 	tracing "github.com/envoyproxy/ai-gateway/internal/tracing/api"
+	openaisdk "github.com/openai/openai-go/v2"
 )
 
 // Ensure imageGenerationSpan implements ImageGenerationSpan.
@@ -21,7 +21,7 @@ type imageGenerationSpan struct {
 }
 
 // RecordResponse invokes [tracing.ImageGenerationRecorder.RecordResponse].
-func (s *imageGenerationSpan) RecordResponse(resp *openai.ImageGenerationResponse) {
+func (s *imageGenerationSpan) RecordResponse(resp *openaisdk.ImagesResponse) {
 	s.recorder.RecordResponse(s.span, resp)
 }
 
diff --git a/internal/tracing/image_generation_tracer.go b/internal/tracing/image_generation_tracer.go
@@ -13,8 +13,8 @@ import (
 	"go.opentelemetry.io/otel/trace"
 	"go.opentelemetry.io/otel/trace/noop"
 
-	"github.com/envoyproxy/ai-gateway/internal/apischema/openai"
 	tracing "github.com/envoyproxy/ai-gateway/internal/tracing/api"
+	openaisdk "github.com/openai/openai-go/v2"
 )
 
 // Ensure imageGenerationTracer implements ImageGenerationTracer.
@@ -39,7 +39,7 @@ type imageGenerationTracer struct {
 }
 
 // StartSpanAndInjectHeaders implements ImageGenerationTracer.StartSpanAndInjectHeaders.
-func (t *imageGenerationTracer) StartSpanAndInjectHeaders(ctx context.Context, headers map[string]string, mutableHeaders *extprocv3.HeaderMutation, req *openai.ImageGenerationRequest, body []byte) tracing.ImageGenerationSpan {
+func (t *imageGenerationTracer) StartSpanAndInjectHeaders(ctx context.Context, headers map[string]string, mutableHeaders *extprocv3.HeaderMutation, req *openaisdk.ImageGenerateParams, body []byte) tracing.ImageGenerationSpan {
 	// Extract trace context from incoming headers.
 	parentCtx := t.propagator.Extract(ctx, propagation.MapCarrier(headers))
 
diff --git a/internal/tracing/openinference/openai/image_generation.go b/internal/tracing/openinference/openai/image_generation.go
@@ -14,9 +14,9 @@ import (
 	"go.opentelemetry.io/otel/codes"
 	"go.opentelemetry.io/otel/trace"
 
-	"github.com/envoyproxy/ai-gateway/internal/apischema/openai"
 	tracing "github.com/envoyproxy/ai-gateway/internal/tracing/api"
 	"github.com/envoyproxy/ai-gateway/internal/tracing/openinference"
+	openaisdk "github.com/openai/openai-go/v2"
 )
 
 // ImageGenerationRecorder implements recorders for OpenInference image generation spans.
@@ -51,17 +51,17 @@ func NewImageGenerationRecorder(config *openinference.TraceConfig) tracing.Image
 var imageGenStartOpts = []trace.SpanStartOption{trace.WithSpanKind(trace.SpanKindInternal)}
 
 // StartParams implements the same method as defined in tracing.ImageGenerationRecorder.
-func (r *ImageGenerationRecorder) StartParams(*openai.ImageGenerationRequest, []byte) (spanName string, opts []trace.SpanStartOption) {
+func (r *ImageGenerationRecorder) StartParams(*openaisdk.ImageGenerateParams, []byte) (spanName string, opts []trace.SpanStartOption) {
 	return "ImageGeneration", imageGenStartOpts
 }
 
 // RecordRequest implements the same method as defined in tracing.ImageGenerationRecorder.
-func (r *ImageGenerationRecorder) RecordRequest(span trace.Span, req *openai.ImageGenerationRequest, body []byte) {
+func (r *ImageGenerationRecorder) RecordRequest(span trace.Span, req *openaisdk.ImageGenerateParams, body []byte) {
 	span.SetAttributes(buildImageGenerationRequestAttributes(req, string(body), r.traceConfig)...)
 }
 
 // RecordResponse implements the same method as defined in tracing.ImageGenerationRecorder.
-func (r *ImageGenerationRecorder) RecordResponse(span trace.Span, resp *openai.ImageGenerationResponse) {
+func (r *ImageGenerationRecorder) RecordResponse(span trace.Span, resp *openaisdk.ImagesResponse) {
 	// Set output attributes.
 	var attrs []attribute.KeyValue
 	attrs = buildImageGenerationResponseAttributes(resp, r.traceConfig)
@@ -84,11 +84,11 @@ func (r *ImageGenerationRecorder) RecordResponseOnError(span trace.Span, statusC
 }
 
 // buildImageGenerationRequestAttributes builds OpenInference attributes from the image generation request.
-func buildImageGenerationRequestAttributes(req *openai.ImageGenerationRequest, body string, config *openinference.TraceConfig) []attribute.KeyValue {
+func buildImageGenerationRequestAttributes(req *openaisdk.ImageGenerateParams, body string, config *openinference.TraceConfig) []attribute.KeyValue {
 	attrs := []attribute.KeyValue{
 		attribute.String(openinference.SpanKind, openinference.SpanKindLLM),
 		attribute.String(openinference.LLMSystem, openinference.LLMSystemOpenAI),
-		attribute.String(openinference.LLMModelName, req.Model),
+		attribute.String(openinference.LLMModelName, string(req.Model)),
 	}
 
 	if config.HideInputs {
@@ -101,34 +101,31 @@ func buildImageGenerationRequestAttributes(req *openai.ImageGenerationRequest, b
 	// Add image generation specific attributes
 	attrs = append(attrs, attribute.String("gen_ai.operation.name", "image_generation"))
 	attrs = append(attrs, attribute.String("gen_ai.image.prompt", req.Prompt))
-	attrs = append(attrs, attribute.String("gen_ai.image.size", req.Size))
-	attrs = append(attrs, attribute.String("gen_ai.image.quality", req.Quality))
-	attrs = append(attrs, attribute.String("gen_ai.image.style", req.Style))
-	attrs = append(attrs, attribute.String("gen_ai.image.response_format", req.ResponseFormat))
-	if req.N != nil {
-		attrs = append(attrs, attribute.Int("gen_ai.image.n", *req.N))
+	attrs = append(attrs, attribute.String("gen_ai.image.size", string(req.Size)))
+	attrs = append(attrs, attribute.String("gen_ai.image.quality", string(req.Quality)))
+	attrs = append(attrs, attribute.String("gen_ai.image.response_format", string(req.ResponseFormat)))
+	if req.N.Valid() {
+		attrs = append(attrs, attribute.Int("gen_ai.image.n", int(req.N.Value)))
 	}
 
 	return attrs
 }
 
 // buildImageGenerationResponseAttributes builds OpenInference attributes from the image generation response.
-func buildImageGenerationResponseAttributes(resp *openai.ImageGenerationResponse, config *openinference.TraceConfig) []attribute.KeyValue {
+func buildImageGenerationResponseAttributes(resp *openaisdk.ImagesResponse, config *openinference.TraceConfig) []attribute.KeyValue {
 	attrs := []attribute.KeyValue{
-		attribute.String("gen_ai.response.model", resp.Model),
 		attribute.Int("gen_ai.image.count", len(resp.Data)),
 	}
 
-	// Add image URLs if not hidden
+	// Add image URLs if not hidden (SDK uses string field for URL)
 	if !config.HideOutputs && resp.Data != nil {
 		urls := make([]string, 0, len(resp.Data))
 		for _, data := range resp.Data {
-			if data.URL != nil {
-				urls = append(urls, *data.URL)
+			if data.URL != "" {
+				urls = append(urls, data.URL)
 			}
 		}
 		if len(urls) > 0 {
-			// Join URLs with comma for attribute storage
 			urlStr := ""
 			for i, url := range urls {
 				if i > 0 {
diff --git a/internal/tracing/openinference/openai/image_generation_test.go b/internal/tracing/openinference/openai/image_generation_test.go

Original file line number	Diff line number	Diff line change
`@@ -8,8 +8,8 @@ package tracing`
`8`	`8`	`import (`
`9`	`9`	`"go.opentelemetry.io/otel/trace"`
`10`	`10`
`11`		`- "github.com/envoyproxy/ai-gateway/internal/apischema/openai"`
`12`	`11`	`tracing "github.com/envoyproxy/ai-gateway/internal/tracing/api"`
	`12`	`+ openaisdk "github.com/openai/openai-go/v2"`
`13`	`13`	`)`
`14`	`14`
`15`	`15`	`// Ensure imageGenerationSpan implements ImageGenerationSpan.`
`@@ -21,7 +21,7 @@ type imageGenerationSpan struct {`
`21`	`21`	`}`
`22`	`22`
`23`	`23`	`// RecordResponse invokes [tracing.ImageGenerationRecorder.RecordResponse].`
`24`		`-func (s imageGenerationSpan) RecordResponse(resp openai.ImageGenerationResponse) {`
	`24`	`+func (s imageGenerationSpan) RecordResponse(resp openaisdk.ImagesResponse) {`
`25`	`25`	`s.recorder.RecordResponse(s.span, resp)`
`26`	`26`	`}`
`27`	`27`
Original file line number	Diff line number	Diff line change
`@@ -13,8 +13,8 @@ import (`
`13`	`13`	`"go.opentelemetry.io/otel/trace"`
`14`	`14`	`"go.opentelemetry.io/otel/trace/noop"`
`15`	`15`
`16`		`- "github.com/envoyproxy/ai-gateway/internal/apischema/openai"`
`17`	`16`	`tracing "github.com/envoyproxy/ai-gateway/internal/tracing/api"`
	`17`	`+ openaisdk "github.com/openai/openai-go/v2"`
`18`	`18`	`)`
`19`	`19`
`20`	`20`	`// Ensure imageGenerationTracer implements ImageGenerationTracer.`
`@@ -39,7 +39,7 @@ type imageGenerationTracer struct {`
`39`	`39`	`}`
`40`	`40`
`41`	`41`	`// StartSpanAndInjectHeaders implements ImageGenerationTracer.StartSpanAndInjectHeaders.`
`42`		`-func (t imageGenerationTracer) StartSpanAndInjectHeaders(ctx context.Context, headers map[string]string, mutableHeaders extprocv3.HeaderMutation, req *openai.ImageGenerationRequest, body []byte) tracing.ImageGenerationSpan {`
	`42`	`+func (t imageGenerationTracer) StartSpanAndInjectHeaders(ctx context.Context, headers map[string]string, mutableHeaders extprocv3.HeaderMutation, req *openaisdk.ImageGenerateParams, body []byte) tracing.ImageGenerationSpan {`
`43`	`43`	`// Extract trace context from incoming headers.`
`44`	`44`	`parentCtx := t.propagator.Extract(ctx, propagation.MapCarrier(headers))`
`45`	`45`