chore: integrate image generation endpoint into main application

PatilHrushikesh · nutanix-Hrushikesh · commit 39aefb12a0b3 · 2025-10-06T14:15:55.000+05:30
This commit wires up the image generation functionality into the main application
and updates test configurations.

Changes include:
- Register image generation processor and metrics in main.go
- Add image generation metadata support to translator
- Update test configurations to include image generation endpoint
- Add test cases for image generation functionality
- Update Envoy configuration for image generation routing

These changes complete the integration of the image generation feature
into the AI Gateway application.
diff --git a/cmd/extproc/mainlib/main.go b/cmd/extproc/mainlib/main.go
@@ -230,6 +230,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
 	}
 	chatCompletionMetrics := metrics.NewChatCompletion(meter, metricsRequestHeaderAttributes)
 	embeddingsMetrics := metrics.NewEmbeddings(meter, metricsRequestHeaderAttributes)
+	imageGenerationMetrics := metrics.NewImageGeneration(meter, metricsRequestHeaderAttributes)
 	mcpMetrics := metrics.NewMCP(meter)
 
 	tracing, err := tracing.NewTracingFromEnv(ctx, os.Stdout, spanRequestHeaderAttributes)
@@ -244,6 +245,7 @@ func Main(ctx context.Context, args []string, stderr io.Writer) (err error) {
 	server.Register(path.Join(flags.rootPrefix, "/v1/chat/completions"), extproc.ChatCompletionProcessorFactory(chatCompletionMetrics))
 	server.Register(path.Join(flags.rootPrefix, "/v1/completions"), extproc.CompletionsProcessorFactory(nil))
 	server.Register(path.Join(flags.rootPrefix, "/v1/embeddings"), extproc.EmbeddingsProcessorFactory(embeddingsMetrics))
+	server.Register(path.Join(flags.rootPrefix, "/v1/images/generations"), extproc.ImageGenerationProcessorFactory(imageGenerationMetrics))
 	server.Register(path.Join(flags.rootPrefix, "/v1/models"), extproc.NewModelsProcessor)
 	server.Register(path.Join(flags.rootPrefix, "/anthropic/v1/messages"), extproc.MessagesProcessorFactory(chatCompletionMetrics))
 
diff --git a/internal/extproc/translator/translator.go b/internal/extproc/translator/translator.go
@@ -215,9 +215,61 @@ type LLMTokenUsage struct {
 	TotalTokens uint32
 }
 
+// ImageGenerationMetadata contains metadata extracted from image generation responses
+// for metrics and observability.
+type ImageGenerationMetadata struct {
+	// ImageCount is the number of images generated in the response.
+	ImageCount int
+	// Model is the AI model used for image generation.
+	Model string
+	// Size is the size/dimensions of the generated images.
+	Size string
+}
+
 // SJSONOptions are the options used for sjson operations in the translator.
 // This is also used outside the package to share the same options for consistency.
 var SJSONOptions = &sjson.Options{
 	Optimistic:     true,
 	ReplaceInPlace: true,
 }
+
+// ImageGenerationTranslator translates the request and response messages between the client and the backend API schemas
+// for /v1/images/generations endpoint of OpenAI.
+//
+// This is created per request and is not thread-safe.
+type ImageGenerationTranslator interface {
+	// RequestBody translates the request body.
+	// 	- raw is the raw request body.
+	// 	- body is the request body parsed into the [openai.ImageGenerationRequest].
+	//	- forceBodyMutation is true if the translator should always mutate the body, even if no changes are made.
+	//	- This returns headerMutation and bodyMutation that can be nil to indicate no mutation.
+	RequestBody(raw []byte, body *openai.ImageGenerationRequest, forceBodyMutation bool) (
+		headerMutation *extprocv3.HeaderMutation,
+		bodyMutation *extprocv3.BodyMutation,
+		err error,
+	)
+
+	// ResponseHeaders translates the response headers.
+	// 	- headers is the response headers.
+	//	- This returns headerMutation that can be nil to indicate no mutation.
+	ResponseHeaders(headers map[string]string) (
+		headerMutation *extprocv3.HeaderMutation,
+		err error,
+	)
+
+	// ResponseBody translates the response body.
+	// 	- body is the response body.
+	//	- This returns headerMutation and bodyMutation that can be nil to indicate no mutation.
+	ResponseBody(respHeaders map[string]string, body io.Reader, endOfStream bool) (
+		headerMutation *extprocv3.HeaderMutation,
+		bodyMutation *extprocv3.BodyMutation,
+		tokenUsage LLMTokenUsage,
+		imageMetadata ImageGenerationMetadata,
+		err error,
+	)
+
+	// ResponseError translates the response error. This is called when the upstream response status code is not successful (2xx).
+	// 	- respHeaders is the response headers.
+	// 	- body is the response body that contains the error message.
+	ResponseError(respHeaders map[string]string, body io.Reader) (headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error)
+}
diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go
@@ -127,6 +127,30 @@ func TestWithTestUpstream(t *testing.T) {
 		// expResponseBodyFunc is a function to check the response body. This can be used instead of the expResponseBody field.
 		expResponseBodyFunc func(require.TestingT, []byte)
 	}{
+		{
+			name:            "openai - /v1/images/generations",
+			backend:         "openai",
+			path:            "/v1/images/generations",
+			method:          http.MethodPost,
+			requestBody:     `{"model":"dall-e-2","prompt":"a cat wearing sunglasses"}`,
+			expPath:         "/v1/images/generations",
+			responseBody:    `{"created":1736890000,"data":[{"url":"https://example.com/image1.png"}],"model":"dall-e-2","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`,
+			expStatus:       http.StatusOK,
+			expResponseBody: `{"created":1736890000,"data":[{"url":"https://example.com/image1.png"}],"model":"dall-e-2","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`,
+		},
+		{
+			name:            "openai - /v1/images/generations - non json upstream error mapped to OpenAI",
+			backend:         "openai",
+			path:            "/v1/images/generations",
+			method:          http.MethodPost,
+			requestBody:     `{"model":"dall-e-3","prompt":"a scenic beach"}`,
+			expPath:         "/v1/images/generations",
+			responseHeaders: "content-type:text/plain",
+			responseStatus:  strconv.Itoa(http.StatusServiceUnavailable),
+			responseBody:    `backend timeout`,
+			expStatus:       http.StatusServiceUnavailable,
+			expResponseBody: `{"error":{"type":"OpenAIBackendError","message":"backend timeout","code":"503"}}`,
+		},
 		{
 			name:            "unknown path",
 			path:            "/unknown",
diff --git a/tests/extproc/vcr/envoy.yaml b/tests/extproc/vcr/envoy.yaml
@@ -87,6 +87,10 @@ static_resources:
                             path: "/v1/embeddings"
                           route:
                             cluster: openai
+                        - match:
+                            path: "/v1/images/generations"
+                          route:
+                            cluster: openai
                         - match:
                             path: "/v1/models"
                           route: