From cbbbf374b6362bd4e21e426297be9d7dca303af1 Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Fri, 13 Jun 2025 14:07:22 +0200
Subject: [PATCH 01/10] feat: validate request body for embeddings requests

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 pkg/plugins/gateway/util.go | 61 +++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/pkg/plugins/gateway/util.go b/pkg/plugins/gateway/util.go
index 0e57ea540..1e470a0d6 100644
--- a/pkg/plugins/gateway/util.go
+++ b/pkg/plugins/gateway/util.go
@@ -24,6 +24,7 @@ import (
 	extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
 	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/packages/param"
 	"github.com/vllm-project/aibrix/pkg/utils"
 	"k8s.io/klog/v2"
 )
@@ -69,6 +70,24 @@ func validateRequestBody(requestID, requestPath string, requestBody []byte, user
 		}
 		model = completionObj.Model
 		message = completionObj.Prompt
+	} else if requestPath == "/v1/embeddings" {
+		message = "" // prefix_cache algorithms are not relevant for embeddings
+		var jsonMap map[string]json.RawMessage
+		if err := json.Unmarshal(requestBody, &jsonMap); err != nil {
+			klog.ErrorS(err, "error to unmarshal request body", "requestID", requestID, "requestBody", string(requestBody))
+			errRes = buildErrorResponse(envoyTypePb.StatusCode_BadRequest, "error processing request body", HeaderErrorRequestBodyProcessing, "true")
+			return
+		}
+		embeddingObj := openai.EmbeddingNewParams{}
+		if err := json.Unmarshal(requestBody, &embeddingObj); err != nil {
+			klog.ErrorS(err, "error to unmarshal embeddings object", "requestID", requestID, "requestBody", string(requestBody))
+			errRes = buildErrorResponse(envoyTypePb.StatusCode_BadRequest, "error processing request body", HeaderErrorRequestBodyProcessing, "true")
+			return
+		}
+		model = embeddingObj.Model
+		if errRes = checkEmbeddingInputSequenceLen(requestID, embeddingObj); errRes != nil {
+			return
+		}
 	} else {
 		errRes = buildErrorResponse(envoyTypePb.StatusCode_NotImplemented, "unknown request path", HeaderErrorRequestBodyProcessing, "true")
 		return
@@ -142,6 +161,48 @@ func getChatCompletionsMessage(requestID string, chatCompletionObj openai.ChatCo
 	return builder.String(), nil
 }
 
+// getEmbeddingsInputLen returns the len of the embeddings object
+func checkEmbeddingInputSequenceLen(requestID string, embeddingObj openai.EmbeddingNewParams) *extProcPb.ProcessingResponse {
+	inputParam := embeddingObj.Input
+	var size int
+	switch input := embeddingNewParamsInputUnionAsAny(&inputParam).(type) {
+	case *string:
+		size = len(*input)
+	case *[]string:
+		size = len(*input)
+	case *[]int64:
+		size = len(*input)
+	case *[][]int64:
+		size = len(*input)
+	default:
+	}
+
+	if size == 0 {
+		klog.ErrorS(nil, "no input in the request body", "requestID", requestID)
+		return buildErrorResponse(envoyTypePb.StatusCode_BadRequest, "no messages in the request body", HeaderErrorRequestBodyProcessing, "true")
+	}
+	if size > 1024 {
+		klog.ErrorS(nil, "embeddings content is too large", "requestID", requestID, "size", size)
+		return buildErrorResponse(envoyTypePb.StatusCode_BadRequest, "embeddings content is too large", HeaderErrorRequestBodyProcessing, "true")
+	}
+
+	return nil
+}
+
+// TODO: make asAny method publicly available on OpenAI go
+func embeddingNewParamsInputUnionAsAny(u *openai.EmbeddingNewParamsInputUnion) any {
+	if !param.IsOmitted(u.OfString) {
+		return &u.OfString.Value
+	} else if !param.IsOmitted(u.OfArrayOfStrings) {
+		return &u.OfArrayOfStrings
+	} else if !param.IsOmitted(u.OfArrayOfTokens) {
+		return &u.OfArrayOfTokens
+	} else if !param.IsOmitted(u.OfArrayOfTokenArrays) {
+		return &u.OfArrayOfTokenArrays
+	}
+	return nil
+}
+
 // generateErrorResponse construct envoy proxy error response
 // deprecated: use buildErrorResponse
 func generateErrorResponse(statusCode envoyTypePb.StatusCode, headers []*configPb.HeaderValueOption, body string) *extProcPb.ProcessingResponse {

From 8e85c0352218b93cbac4b1d625fef673347eff06 Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Fri, 13 Jun 2025 14:10:30 +0200
Subject: [PATCH 02/10] feat: add v1/embeddings in created httpRoute

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 pkg/controller/modelrouter/modelrouter_controller.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pkg/controller/modelrouter/modelrouter_controller.go b/pkg/controller/modelrouter/modelrouter_controller.go
index 1e6e10611..75451b188 100644
--- a/pkg/controller/modelrouter/modelrouter_controller.go
+++ b/pkg/controller/modelrouter/modelrouter_controller.go
@@ -227,6 +227,15 @@ func (m *ModelRouter) createHTTPRoute(namespace string, labels map[string]string
 								modelHeaderMatch,
 							},
 						},
+						{
+							Path: &gatewayv1.HTTPPathMatch{
+								Type:  ptr.To(gatewayv1.PathMatchPathPrefix),
+								Value: ptr.To("/v1/embeddings"),
+							},
+							Headers: []gatewayv1.HTTPHeaderMatch{
+								modelHeaderMatch,
+							},
+						},
 					},
 					BackendRefs: []gatewayv1.HTTPBackendRef{
 						{

From ef82afaae58850fbf3ba060af17580b3fa5cbc43 Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Fri, 13 Jun 2025 14:49:08 +0200
Subject: [PATCH 03/10] feat: dynamically generate httpRouteMatch based on
 parsed labels

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 .../modelrouter/modelrouter_controller.go     | 79 +++++++++++--------
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/pkg/controller/modelrouter/modelrouter_controller.go b/pkg/controller/modelrouter/modelrouter_controller.go
index 75451b188..59818a378 100644
--- a/pkg/controller/modelrouter/modelrouter_controller.go
+++ b/pkg/controller/modelrouter/modelrouter_controller.go
@@ -19,7 +19,9 @@ package modelrouter
 import (
 	"context"
 	"fmt"
+	"slices"
 	"strconv"
+	"strings"
 
 	appsv1 "k8s.io/api/apps/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -41,9 +43,12 @@ import (
 
 const (
 	// TODO (varun): cleanup model related identifiers and establish common consensus
-	modelHeaderIdentifier = "model"
-	modelIdentifier       = "model.aibrix.ai/name"
-	modelPortIdentifier   = "model.aibrix.ai/port"
+	modelHeaderIdentifier          = "model"
+	modelIdentifier                = "model.aibrix.ai/name"
+	modelPortIdentifier            = "model.aibrix.ai/port"
+	modelSupportedRoutesIdentifier = "model.aibrix.ai/supported-routes"
+	modelEmbeddingsRoute           = "embeddings"
+	modelChatCompletionsRoute      = "chat-completions"
 	// TODO (varun): parameterize it or dynamically resolve it
 	aibrixEnvoyGateway          = "aibrix-eg"
 	aibrixEnvoyGatewayNamespace = "aibrix-system"
@@ -107,6 +112,42 @@ func Add(mgr manager.Manager, runtimeConfig config.RuntimeConfig) error {
 	return err
 }
 
+func getSupportedRoutesMatchFromLabelsOrDefault(labels map[string]string, modelHeaderMatch gatewayv1.HTTPHeaderMatch) []gatewayv1.HTTPRouteMatch {
+	nameToRoutePathPrefix := map[string][]string{
+		modelEmbeddingsRoute:      {"/v1/embeddings"},
+		modelChatCompletionsRoute: {"/v1/completions", "/v1/chat/completions"},
+	}
+
+	var pathPrefixes []string
+	if routesLabelValue, ok := labels[modelSupportedRoutesIdentifier]; ok {
+		routes := strings.Split(routesLabelValue, ",")
+		for k, route := range nameToRoutePathPrefix {
+			if slices.Contains(routes, k) {
+				pathPrefixes = append(pathPrefixes, route...)
+			}
+		}
+	}
+
+	// completions and chat completions routes by default
+	if len(pathPrefixes) == 0 {
+		pathPrefixes = append(pathPrefixes, nameToRoutePathPrefix[modelChatCompletionsRoute]...)
+	}
+
+	var routesmatch []gatewayv1.HTTPRouteMatch
+	for _, path := range pathPrefixes {
+		routesmatch = append(routesmatch, gatewayv1.HTTPRouteMatch{
+			Path: &gatewayv1.HTTPPathMatch{
+				Type:  ptr.To(gatewayv1.PathMatchPathPrefix),
+				Value: ptr.To(path),
+			},
+			Headers: []gatewayv1.HTTPHeaderMatch{
+				modelHeaderMatch,
+			},
+		})
+	}
+	return routesmatch
+}
+
 type ModelRouter struct {
 	client.Client
 	Scheme        *runtime.Scheme
@@ -192,6 +233,8 @@ func (m *ModelRouter) createHTTPRoute(namespace string, labels map[string]string
 		Value: modelName,
 	}
 
+	httpRoutesMatch := getSupportedRoutesMatchFromLabelsOrDefault(labels, modelHeaderMatch)
+
 	httpRoute := gatewayv1.HTTPRoute{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      fmt.Sprintf("%s-router", modelName),
@@ -208,35 +251,7 @@ func (m *ModelRouter) createHTTPRoute(namespace string, labels map[string]string
 			},
 			Rules: []gatewayv1.HTTPRouteRule{
 				{
-					Matches: []gatewayv1.HTTPRouteMatch{
-						{
-							Path: &gatewayv1.HTTPPathMatch{
-								Type:  ptr.To(gatewayv1.PathMatchPathPrefix),
-								Value: ptr.To("/v1/completions"),
-							},
-							Headers: []gatewayv1.HTTPHeaderMatch{
-								modelHeaderMatch,
-							},
-						},
-						{
-							Path: &gatewayv1.HTTPPathMatch{
-								Type:  ptr.To(gatewayv1.PathMatchPathPrefix),
-								Value: ptr.To("/v1/chat/completions"),
-							},
-							Headers: []gatewayv1.HTTPHeaderMatch{
-								modelHeaderMatch,
-							},
-						},
-						{
-							Path: &gatewayv1.HTTPPathMatch{
-								Type:  ptr.To(gatewayv1.PathMatchPathPrefix),
-								Value: ptr.To("/v1/embeddings"),
-							},
-							Headers: []gatewayv1.HTTPHeaderMatch{
-								modelHeaderMatch,
-							},
-						},
-					},
+					Matches: httpRoutesMatch,
 					BackendRefs: []gatewayv1.HTTPBackendRef{
 						{
 							BackendRef: gatewayv1.BackendRef{

From 49978b4bad342e8491c55885556a13cd6247dc6f Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Thu, 19 Jun 2025 11:05:32 +0200
Subject: [PATCH 04/10] feat: document max embeddings input array size

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 pkg/plugins/gateway/util.go | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pkg/plugins/gateway/util.go b/pkg/plugins/gateway/util.go
index 1e470a0d6..183a2abb1 100644
--- a/pkg/plugins/gateway/util.go
+++ b/pkg/plugins/gateway/util.go
@@ -29,6 +29,10 @@ import (
 	"k8s.io/klog/v2"
 )
 
+// OpenAI has a 2048 size limits for embeddings array inputs
+// see https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-input
+var maxEmbeddingInputArraySize = 2048
+
 // validateRequestBody validates input by unmarshaling request body into respective openai-golang struct based on requestpath.
 // nolint:nakedret
 func validateRequestBody(requestID, requestPath string, requestBody []byte, user utils.User) (model, message string, stream bool, errRes *extProcPb.ProcessingResponse) {
@@ -165,15 +169,18 @@ func getChatCompletionsMessage(requestID string, chatCompletionObj openai.ChatCo
 func checkEmbeddingInputSequenceLen(requestID string, embeddingObj openai.EmbeddingNewParams) *extProcPb.ProcessingResponse {
 	inputParam := embeddingObj.Input
 	var size int
+	isArrayType := false
 	switch input := embeddingNewParamsInputUnionAsAny(&inputParam).(type) {
 	case *string:
 		size = len(*input)
 	case *[]string:
 		size = len(*input)
+		isArrayType = true
 	case *[]int64:
 		size = len(*input)
 	case *[][]int64:
 		size = len(*input)
+		isArrayType = true
 	default:
 	}
 
@@ -181,7 +188,8 @@ func checkEmbeddingInputSequenceLen(requestID string, embeddingObj openai.Embedd
 		klog.ErrorS(nil, "no input in the request body", "requestID", requestID)
 		return buildErrorResponse(envoyTypePb.StatusCode_BadRequest, "no messages in the request body", HeaderErrorRequestBodyProcessing, "true")
 	}
-	if size > 1024 {
+
+	if isArrayType && size > maxEmbeddingInputArraySize {
 		klog.ErrorS(nil, "embeddings content is too large", "requestID", requestID, "size", size)
 		return buildErrorResponse(envoyTypePb.StatusCode_BadRequest, "embeddings content is too large", HeaderErrorRequestBodyProcessing, "true")
 	}

From cecb3827b14836bc1773f86d90680c68ae03010a Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Thu, 19 Jun 2025 11:12:51 +0200
Subject: [PATCH 05/10] feat: explicitely logs unknown embeddings types

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 pkg/plugins/gateway/util.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pkg/plugins/gateway/util.go b/pkg/plugins/gateway/util.go
index 183a2abb1..01edbebf6 100644
--- a/pkg/plugins/gateway/util.go
+++ b/pkg/plugins/gateway/util.go
@@ -18,6 +18,7 @@ package gateway
 
 import (
 	"encoding/json"
+	"fmt"
 	"strings"
 
 	configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
@@ -182,6 +183,11 @@ func checkEmbeddingInputSequenceLen(requestID string, embeddingObj openai.Embedd
 		size = len(*input)
 		isArrayType = true
 	default:
+		// Should never happend, but if input is of an unexpected non-nil type, let's explicitly error log it.
+		// Size will be 0 in this case, which is then handled by the check below.
+		if input != nil {
+			klog.ErrorS(nil, "unhandled embedding input type", "requestID", requestID, "inputType", fmt.Sprintf("%T", input))
+		}
 	}
 
 	if size == 0 {

From c165cc055c206ee0caecfae543f107b09604e0ef Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Thu, 19 Jun 2025 11:29:56 +0200
Subject: [PATCH 06/10] feat: explicitely set default route option

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 .../modelrouter/modelrouter_controller.go      | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/pkg/controller/modelrouter/modelrouter_controller.go b/pkg/controller/modelrouter/modelrouter_controller.go
index 59818a378..dd0f1ab4c 100644
--- a/pkg/controller/modelrouter/modelrouter_controller.go
+++ b/pkg/controller/modelrouter/modelrouter_controller.go
@@ -47,8 +47,9 @@ const (
 	modelIdentifier                = "model.aibrix.ai/name"
 	modelPortIdentifier            = "model.aibrix.ai/port"
 	modelSupportedRoutesIdentifier = "model.aibrix.ai/supported-routes"
-	modelEmbeddingsRoute           = "embeddings"
-	modelChatCompletionsRoute      = "chat-completions"
+	modelRouteEmbeddings           = "embeddings"
+	modelRouteChatCompletions      = "chat-completions"
+	modelRouteDefault              = modelRouteChatCompletions
 	// TODO (varun): parameterize it or dynamically resolve it
 	aibrixEnvoyGateway          = "aibrix-eg"
 	aibrixEnvoyGatewayNamespace = "aibrix-system"
@@ -112,25 +113,26 @@ func Add(mgr manager.Manager, runtimeConfig config.RuntimeConfig) error {
 	return err
 }
 
+// getSupportedRoutesMatchFromLabelsOrDefault returns the HTTPRouteMatch based on the model route labels value
 func getSupportedRoutesMatchFromLabelsOrDefault(labels map[string]string, modelHeaderMatch gatewayv1.HTTPHeaderMatch) []gatewayv1.HTTPRouteMatch {
-	nameToRoutePathPrefix := map[string][]string{
-		modelEmbeddingsRoute:      {"/v1/embeddings"},
-		modelChatCompletionsRoute: {"/v1/completions", "/v1/chat/completions"},
+	labelValueToRoutePathPrefix := map[string][]string{
+		modelRouteEmbeddings:      {"/v1/embeddings"},
+		modelRouteChatCompletions: {"/v1/completions", "/v1/chat/completions"},
 	}
 
 	var pathPrefixes []string
 	if routesLabelValue, ok := labels[modelSupportedRoutesIdentifier]; ok {
 		routes := strings.Split(routesLabelValue, ",")
-		for k, route := range nameToRoutePathPrefix {
+		for k, route := range labelValueToRoutePathPrefix {
 			if slices.Contains(routes, k) {
 				pathPrefixes = append(pathPrefixes, route...)
 			}
 		}
 	}
 
-	// completions and chat completions routes by default
+	// Add the default pathPrefixes if no route defines via labels
 	if len(pathPrefixes) == 0 {
-		pathPrefixes = append(pathPrefixes, nameToRoutePathPrefix[modelChatCompletionsRoute]...)
+		pathPrefixes = append(pathPrefixes, labelValueToRoutePathPrefix[modelRouteDefault]...)
 	}
 
 	var routesmatch []gatewayv1.HTTPRouteMatch

From fadfadf0c4ea1142020b6a43d3f083ffa0044635 Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Fri, 20 Jun 2025 09:14:31 +0200
Subject: [PATCH 07/10] feat: introduce openai-specific types for request type
 matching

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 pkg/plugins/gateway/openai_utils.go | 46 +++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 pkg/plugins/gateway/openai_utils.go

diff --git a/pkg/plugins/gateway/openai_utils.go b/pkg/plugins/gateway/openai_utils.go
new file mode 100644
index 000000000..735e7d64d
--- /dev/null
+++ b/pkg/plugins/gateway/openai_utils.go
@@ -0,0 +1,46 @@
+/*
+Copyright 2024 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gateway
+
+type (
+	OpenAiRequestType string
+	OpenAiRequestPath string
+)
+
+var (
+	OpenAiRequestChatCompletionsType OpenAiRequestType = "chat-completions"
+	OpenAiRequestCompletionsType     OpenAiRequestType = "completions"
+	OpenAiRequestEmbeddingsType      OpenAiRequestType = "embeddings"
+	OpenAiRequestUnknownType         OpenAiRequestType = "unknown"
+	OpenAiRequestChatCompletionsPath OpenAiRequestPath = "/v1/chat/completions"
+	OpenAiRequestCompletionsPath     OpenAiRequestPath = "/v1/completions"
+	OpenAiRequestEmbeddingsPath      OpenAiRequestPath = "/v1/embeddings"
+	OpenAiRequestUnknownPath         OpenAiRequestPath = ""
+)
+
+func NewOpenAiRequestTypeFromPath(path string) OpenAiRequestType {
+	requestType := OpenAiRequestUnknownType
+	switch path {
+	case string(OpenAiRequestCompletionsPath):
+		requestType = OpenAiRequestCompletionsType
+	case string(OpenAiRequestChatCompletionsPath):
+		requestType = OpenAiRequestChatCompletionsType
+	case string(OpenAiRequestEmbeddingsPath):
+		requestType = OpenAiRequestEmbeddingsType
+	}
+	return requestType
+}

From 61f1a18e4bf70d7eb92886a4eeed357d7245b4b8 Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Fri, 20 Jun 2025 09:15:21 +0200
Subject: [PATCH 08/10] feat: switch validation of request body based on
 request type

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 pkg/plugins/gateway/gateway_req_body.go | 26 ++++++++++++--------
 pkg/plugins/gateway/util.go             | 18 ++++++++------
 pkg/plugins/gateway/util_test.go        | 32 ++++++++++++-------------
 3 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/pkg/plugins/gateway/gateway_req_body.go b/pkg/plugins/gateway/gateway_req_body.go
index d0bacb10c..552f59bce 100644
--- a/pkg/plugins/gateway/gateway_req_body.go
+++ b/pkg/plugins/gateway/gateway_req_body.go
@@ -31,14 +31,17 @@ import (
 )
 
 func (s *Server) HandleRequestBody(ctx context.Context, requestID string, requestPath string, req *extProcPb.ProcessingRequest,
-	user utils.User, routingAlgorithm types.RoutingAlgorithm) (*extProcPb.ProcessingResponse, string, *types.RoutingContext, bool, int64) {
+	user utils.User, routingAlgorithm types.RoutingAlgorithm,
+) (*extProcPb.ProcessingResponse, string, *types.RoutingContext, bool, OpenAiRequestType, int64) {
 	var routingCtx *types.RoutingContext
 	var term int64 // Identify the trace window
 
+	requestType := NewOpenAiRequestTypeFromPath(requestPath)
+
 	body := req.Request.(*extProcPb.ProcessingRequest_RequestBody)
-	model, message, stream, errRes := validateRequestBody(requestID, requestPath, body.RequestBody.GetBody(), user)
+	model, message, stream, errRes := validateRequestBody(requestID, requestType, body.RequestBody.GetBody(), user)
 	if errRes != nil {
-		return errRes, model, routingCtx, stream, term
+		return errRes, model, routingCtx, stream, requestType, term
 	}
 
 	// early reject the request if model doesn't exist.
@@ -46,8 +49,9 @@ func (s *Server) HandleRequestBody(ctx context.Context, requestID string, reques
 		klog.ErrorS(nil, "model doesn't exist in cache, probably wrong model name", "requestID", requestID, "model", model)
 		return generateErrorResponse(envoyTypePb.StatusCode_BadRequest,
 			[]*configPb.HeaderValueOption{{Header: &configPb.HeaderValue{
-				Key: HeaderErrorNoModelBackends, RawValue: []byte(model)}}},
-			fmt.Sprintf("model %s does not exist", model)), model, routingCtx, stream, term
+				Key: HeaderErrorNoModelBackends, RawValue: []byte(model),
+			}}},
+			fmt.Sprintf("model %s does not exist", model)), model, routingCtx, stream, requestType, term
 	}
 
 	// early reject if no pods are ready to accept request for a model
@@ -56,8 +60,9 @@ func (s *Server) HandleRequestBody(ctx context.Context, requestID string, reques
 		klog.ErrorS(err, "no ready pod available", "requestID", requestID, "model", model)
 		return generateErrorResponse(envoyTypePb.StatusCode_ServiceUnavailable,
 			[]*configPb.HeaderValueOption{{Header: &configPb.HeaderValue{
-				Key: HeaderErrorNoModelBackends, RawValue: []byte("true")}}},
-			fmt.Sprintf("error on getting pods for model %s", model)), model, routingCtx, stream, term
+				Key: HeaderErrorNoModelBackends, RawValue: []byte("true"),
+			}}},
+			fmt.Sprintf("error on getting pods for model %s", model)), model, routingCtx, stream, requestType, term
 	}
 
 	routingCtx = types.NewRoutingContext(ctx, routingAlgorithm, model, message, requestID, user.Name)
@@ -72,8 +77,9 @@ func (s *Server) HandleRequestBody(ctx context.Context, requestID string, reques
 			return generateErrorResponse(
 				envoyTypePb.StatusCode_ServiceUnavailable,
 				[]*configPb.HeaderValueOption{{Header: &configPb.HeaderValue{
-					Key: HeaderErrorRouting, RawValue: []byte("true")}}},
-				"error on selecting target pod"), model, routingCtx, stream, term
+					Key: HeaderErrorRouting, RawValue: []byte("true"),
+				}}},
+				"error on selecting target pod"), model, routingCtx, stream, requestType, term
 		}
 		headers = buildEnvoyProxyHeaders(headers,
 			HeaderRoutingStrategy, string(routingAlgorithm),
@@ -93,5 +99,5 @@ func (s *Server) HandleRequestBody(ctx context.Context, requestID string, reques
 				},
 			},
 		},
-	}, model, routingCtx, stream, term
+	}, model, routingCtx, stream, requestType, term
 }
diff --git a/pkg/plugins/gateway/util.go b/pkg/plugins/gateway/util.go
index 01edbebf6..54f2302a3 100644
--- a/pkg/plugins/gateway/util.go
+++ b/pkg/plugins/gateway/util.go
@@ -34,11 +34,13 @@ import (
 // see https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-input
 var maxEmbeddingInputArraySize = 2048
 
-// validateRequestBody validates input by unmarshaling request body into respective openai-golang struct based on requestpath.
+// validateRequestBody validates input by unmarshaling request body into respective openai-golang struct based on requestType.
 // nolint:nakedret
-func validateRequestBody(requestID, requestPath string, requestBody []byte, user utils.User) (model, message string, stream bool, errRes *extProcPb.ProcessingResponse) {
+func validateRequestBody(requestID string, requestType OpenAiRequestType, requestBody []byte, user utils.User) (model, message string, stream bool, errRes *extProcPb.ProcessingResponse) {
 	var streamOptions openai.ChatCompletionStreamOptionsParam
-	if requestPath == "/v1/chat/completions" {
+	switch requestType {
+
+	case OpenAiRequestChatCompletionsType:
 		var jsonMap map[string]json.RawMessage
 		if err := json.Unmarshal(requestBody, &jsonMap); err != nil {
 			klog.ErrorS(err, "error to unmarshal request body", "requestID", requestID, "requestBody", string(requestBody))
@@ -59,7 +61,8 @@ func validateRequestBody(requestID, requestPath string, requestBody []byte, user
 		if errRes = validateStreamOptions(requestID, user, &stream, streamOptions, jsonMap); errRes != nil {
 			return
 		}
-	} else if requestPath == "/v1/completions" {
+
+	case OpenAiRequestCompletionsType:
 		// openai.CompletionsNewParams does not support json unmarshal for CompletionNewParamsPromptUnion in release v0.1.0-beta.10
 		// once supported, input request will be directly unmarshal into openai.CompletionsNewParams
 		type Completion struct {
@@ -75,7 +78,8 @@ func validateRequestBody(requestID, requestPath string, requestBody []byte, user
 		}
 		model = completionObj.Model
 		message = completionObj.Prompt
-	} else if requestPath == "/v1/embeddings" {
+
+	case OpenAiRequestEmbeddingsType:
 		message = "" // prefix_cache algorithms are not relevant for embeddings
 		var jsonMap map[string]json.RawMessage
 		if err := json.Unmarshal(requestBody, &jsonMap); err != nil {
@@ -93,12 +97,12 @@ func validateRequestBody(requestID, requestPath string, requestBody []byte, user
 		if errRes = checkEmbeddingInputSequenceLen(requestID, embeddingObj); errRes != nil {
 			return
 		}
-	} else {
+	case OpenAiRequestUnknownType:
 		errRes = buildErrorResponse(envoyTypePb.StatusCode_NotImplemented, "unknown request path", HeaderErrorRequestBodyProcessing, "true")
 		return
 	}
 
-	klog.V(4).InfoS("validateRequestBody", "requestID", requestID, "requestPath", requestPath, "model", model, "message", message, "stream", stream, "streamOptions", streamOptions)
+	klog.V(4).InfoS("validateRequestBody", "requestID", requestID, "requestType", requestType, "model", model, "message", message, "stream", stream, "streamOptions", streamOptions)
 	return
 }
 
diff --git a/pkg/plugins/gateway/util_test.go b/pkg/plugins/gateway/util_test.go
index dacb89215..4570b5122 100644
--- a/pkg/plugins/gateway/util_test.go
+++ b/pkg/plugins/gateway/util_test.go
@@ -28,7 +28,7 @@ import (
 func Test_ValidateRequestBody(t *testing.T) {
 	testCases := []struct {
 		message     string
-		requestPath string
+		requestType OpenAiRequestType
 		requestBody []byte
 		model       string
 		messages    string
@@ -38,30 +38,30 @@ func Test_ValidateRequestBody(t *testing.T) {
 	}{
 		{
 			message:     "unknown path",
-			requestPath: "/v1/unknown",
+			requestType: OpenAiRequestUnknownType,
 			statusCode:  envoyTypePb.StatusCode_NotImplemented,
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal error",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte("bad_request"),
 			statusCode:  envoyTypePb.StatusCode_BadRequest,
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal ChatCompletionsNewParams",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": 1}`),
 			statusCode:  envoyTypePb.StatusCode_BadRequest,
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal no messages",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b"}`),
 			statusCode:  envoyTypePb.StatusCode_BadRequest,
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal valid messages",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "messages": [{"role": "system", "content": "this is system"},{"role": "user", "content": "say this is test"}]}`),
 			model:       "llama2-7b",
 			messages:    "this is system say this is test",
@@ -69,13 +69,13 @@ func Test_ValidateRequestBody(t *testing.T) {
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal invalid messages with complex content",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "messages": [{"role": "system", "content": "this is system"},{"role": "user", "content": {"type": "text", "text": "say this is test", "complex": make(chan int)}}]}`),
 			statusCode:  envoyTypePb.StatusCode_BadRequest,
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal valid messages with complex content",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "messages": [{"role": "system", "content": "this is system"},{"role": "user", "content": [{"type": "text", "text": "say this is test"}, {"type": "text", "text": "say this is test"}]}]}`),
 			model:       "llama2-7b",
 			messages:    "this is system [{\"text\":\"say this is test\",\"type\":\"text\"},{\"text\":\"say this is test\",\"type\":\"text\"}]",
@@ -83,7 +83,7 @@ func Test_ValidateRequestBody(t *testing.T) {
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal valid messages with stop string param",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "messages": [{"role": "system", "content": "this is system"},{"role": "user", "content": "say this is test"}], "stop": "stop"}`),
 			model:       "llama2-7b",
 			messages:    "this is system say this is test",
@@ -91,7 +91,7 @@ func Test_ValidateRequestBody(t *testing.T) {
 		},
 		{
 			message:     "/v1/chat/completions json unmarhsal valid messages with stop array param",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "messages": [{"role": "system", "content": "this is system"},{"role": "user", "content": "say this is test"}], "stop": ["stop"]}`),
 			model:       "llama2-7b",
 			messages:    "this is system say this is test",
@@ -99,13 +99,13 @@ func Test_ValidateRequestBody(t *testing.T) {
 		},
 		{
 			message:     "/v1/chat/completions json unmarshal invalid stream bool",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "stream": "true", "messages": [{"role": "system", "content": "this is system"}]}`),
 			statusCode:  envoyTypePb.StatusCode_BadRequest,
 		},
 		{
 			message:     "/v1/chat/completions json unmarshal stream options is null",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			user:        utils.User{Tpm: 1},
 			requestBody: []byte(`{"model": "llama2-7b", "stream": true, "messages": [{"role": "system", "content": "this is system"}]}`),
 			statusCode:  envoyTypePb.StatusCode_BadRequest,
@@ -113,20 +113,20 @@ func Test_ValidateRequestBody(t *testing.T) {
 		{
 			message:     "/v1/chat/completions stream_options.include_usage == false with user.TPM >= 1 is NOT OK",
 			user:        utils.User{Tpm: 1},
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "stream": true, "stream_options": {"include_usage": false},  "messages": [{"role": "system", "content": "this is system"}]}`),
 			statusCode:  envoyTypePb.StatusCode_BadRequest,
 		},
 		{
 			message:     "/v1/chat/completions stream_options.include_usage == false with user.TPM == 0 is OK",
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "stream": true, "stream_options": {"include_usage": false},  "messages": [{"role": "system", "content": "this is system"}]}`),
 			statusCode:  envoyTypePb.StatusCode_OK,
 		},
 		{
 			message:     "/v1/chat/completions valid request body",
 			user:        utils.User{Tpm: 1},
-			requestPath: "/v1/chat/completions",
+			requestType: OpenAiRequestChatCompletionsType,
 			requestBody: []byte(`{"model": "llama2-7b", "stream": true, "stream_options": {"include_usage": true}, "messages": [{"role": "system", "content": "this is system"},{"role": "user", "content": "say this is test"}]}`),
 			stream:      true,
 			model:       "llama2-7b",
@@ -136,7 +136,7 @@ func Test_ValidateRequestBody(t *testing.T) {
 	}
 
 	for _, tt := range testCases {
-		model, messages, stream, errRes := validateRequestBody("1", tt.requestPath, tt.requestBody, tt.user)
+		model, messages, stream, errRes := validateRequestBody("1", tt.requestType, tt.requestBody, tt.user)
 
 		if tt.statusCode == 200 {
 			assert.Equal(t, (*extProcPb.ProcessingResponse)(nil), errRes, tt.message)

From 2c866455b03a5ea2b4a60f2fea446eafdec95f95 Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Fri, 20 Jun 2025 09:22:43 +0200
Subject: [PATCH 09/10] feat: process response body differentially based on
 request type

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 pkg/plugins/gateway/gateway.go          |   8 +-
 pkg/plugins/gateway/gateway_rsp_body.go | 159 +++++++++++++++++++++++-
 2 files changed, 163 insertions(+), 4 deletions(-)

diff --git a/pkg/plugins/gateway/gateway.go b/pkg/plugins/gateway/gateway.go
index 608ebc36a..3f322a2e2 100644
--- a/pkg/plugins/gateway/gateway.go
+++ b/pkg/plugins/gateway/gateway.go
@@ -82,6 +82,7 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
 	var respErrorCode int
 	var model string
 	var requestPath string
+	var requestType OpenAiRequestType
 	var routingAlgorithm types.RoutingAlgorithm
 	var routerCtx *types.RoutingContext
 	var stream, isRespError bool
@@ -113,7 +114,7 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
 			resp, user, rpm, routingAlgorithm, requestPath = s.HandleRequestHeaders(ctx, requestID, req)
 
 		case *extProcPb.ProcessingRequest_RequestBody:
-			resp, model, routerCtx, stream, traceTerm = s.HandleRequestBody(ctx, requestID, requestPath, req, user, routingAlgorithm)
+			resp, model, routerCtx, stream, requestType, traceTerm = s.HandleRequestBody(ctx, requestID, requestPath, req, user, routingAlgorithm)
 			if routerCtx != nil {
 				ctx = routerCtx
 			}
@@ -135,7 +136,7 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
 				resp = s.responseErrorProcessing(ctx, resp, respErrorCode, model, requestID,
 					string(req.Request.(*extProcPb.ProcessingRequest_ResponseBody).ResponseBody.GetBody()))
 			} else {
-				resp, completed = s.HandleResponseBody(ctx, requestID, req, user, rpm, model, stream, traceTerm, completed)
+				resp, completed = s.HandleResponseBody(ctx, requestID, req, requestType, user, rpm, model, stream, traceTerm, completed)
 			}
 		default:
 			klog.Infof("Unknown Request type %+v\n", v)
@@ -205,7 +206,8 @@ func (s *Server) validateHTTPRouteStatus(ctx context.Context, model string) erro
 }
 
 func (s *Server) responseErrorProcessing(ctx context.Context, resp *extProcPb.ProcessingResponse, respErrorCode int,
-	model, requestID, errMsg string) *extProcPb.ProcessingResponse {
+	model, requestID, errMsg string,
+) *extProcPb.ProcessingResponse {
 	httprouteErr := s.validateHTTPRouteStatus(ctx, model)
 	if errMsg != "" && httprouteErr != nil {
 		errMsg = fmt.Sprintf("%s. %s", errMsg, httprouteErr.Error())
diff --git a/pkg/plugins/gateway/gateway_rsp_body.go b/pkg/plugins/gateway/gateway_rsp_body.go
index d8bcf4634..6eeb70105 100644
--- a/pkg/plugins/gateway/gateway_rsp_body.go
+++ b/pkg/plugins/gateway/gateway_rsp_body.go
@@ -35,9 +35,26 @@ import (
 	"github.com/vllm-project/aibrix/pkg/utils"
 )
 
-func (s *Server) HandleResponseBody(ctx context.Context, requestID string, req *extProcPb.ProcessingRequest, user utils.User, rpm int64, model string, stream bool, traceTerm int64, hasCompleted bool) (*extProcPb.ProcessingResponse, bool) {
+func (s *Server) HandleResponseBody(ctx context.Context, requestID string, req *extProcPb.ProcessingRequest, requestType OpenAiRequestType, user utils.User, rpm int64, model string, stream bool, traceTerm int64, hasCompleted bool) (*extProcPb.ProcessingResponse, bool) {
 	b := req.Request.(*extProcPb.ProcessingRequest_ResponseBody)
 
+	switch requestType {
+	case OpenAiRequestChatCompletionsType, OpenAiRequestCompletionsType:
+		return s.handleChatCompletionsResponseBody(ctx, requestID, b, user, rpm, model, stream, traceTerm, hasCompleted)
+	case OpenAiRequestEmbeddingsType:
+		return s.handleEmbeddingsResponseBody(ctx, requestID, b, user, rpm, model, false, traceTerm, hasCompleted)
+	default:
+		// all other openAi request types (e.g. audio, image, ..) are not supported yet
+		return generateErrorResponse(
+			envoyTypePb.StatusCode_NotImplemented,
+			[]*configPb.HeaderValueOption{{Header: &configPb.HeaderValue{
+				Key: HeaderErrorResponseUnknown, RawValue: []byte("true"),
+			}}},
+			"request type not supported"), true
+	}
+}
+
+func (s *Server) handleChatCompletionsResponseBody(ctx context.Context, requestID string, b *extProcPb.ProcessingRequest_ResponseBody, user utils.User, rpm int64, model string, stream bool, traceTerm int64, hasCompleted bool) (*extProcPb.ProcessingResponse, bool) {
 	var res openai.ChatCompletion
 	var usage openai.CompletionUsage
 	var promptTokens, completionTokens int64
@@ -203,3 +220,143 @@ func (s *Server) HandleResponseBody(ctx context.Context, requestID string, req *
 		},
 	}, complete
 }
+
+func (s *Server) handleEmbeddingsResponseBody(ctx context.Context, requestID string, b *extProcPb.ProcessingRequest_ResponseBody, user utils.User, rpm int64, model string, stream bool, traceTerm int64, hasCompleted bool) (*extProcPb.ProcessingResponse, bool) {
+	var res openai.CreateEmbeddingResponse
+	var usage openai.CreateEmbeddingResponseUsage
+	var promptTokens, completionTokens int64
+	var headers []*configPb.HeaderValueOption
+	complete := hasCompleted
+	routerCtx, _ := ctx.(*types.RoutingContext)
+
+	defer func() {
+		// Wrapped in a function to delay the evaluation of parameters. Using complete to make sure DoneRequestTrace only call once for a request.
+		if !hasCompleted && complete {
+			s.cache.DoneRequestTrace(routerCtx, requestID, model, promptTokens, completionTokens, traceTerm)
+			if routerCtx != nil {
+				routerCtx.Delete()
+			}
+		}
+	}()
+
+	// Use request ID as a key to store per-request buffer
+	// Retrieve or create buffer
+	buf, _ := requestBuffers.LoadOrStore(requestID, &bytes.Buffer{})
+	buffer := buf.(*bytes.Buffer)
+	// Append data to per-request buffer
+	buffer.Write(b.ResponseBody.Body)
+
+	if !b.ResponseBody.EndOfStream {
+		// Partial data received, wait for more chunks, we just return a common response here.
+		return &extProcPb.ProcessingResponse{
+			Response: &extProcPb.ProcessingResponse_ResponseBody{
+				ResponseBody: &extProcPb.BodyResponse{
+					Response: &extProcPb.CommonResponse{},
+				},
+			},
+		}, complete
+	}
+
+	// Last part received, process the full response
+	finalBody := buffer.Bytes()
+	// Clean up the buffer after final processing
+	requestBuffers.Delete(requestID)
+
+	if err := json.Unmarshal(finalBody, &res); err != nil {
+		klog.ErrorS(err, "error to unmarshal response", "requestID", requestID, "responseBody", string(b.ResponseBody.GetBody()))
+		complete = true
+		return generateErrorResponse(
+			envoyTypePb.StatusCode_InternalServerError,
+			[]*configPb.HeaderValueOption{{Header: &configPb.HeaderValue{
+				Key: HeaderErrorResponseUnmarshal, RawValue: []byte("true"),
+			}}},
+			err.Error()), complete
+	} else if len(res.Model) == 0 {
+		msg := ErrorUnknownResponse.Error()
+		responseBodyContent := string(b.ResponseBody.GetBody())
+		if len(responseBodyContent) != 0 {
+			msg = responseBodyContent
+		}
+		klog.ErrorS(err, "unexpected response", "requestID", requestID, "responseBody", responseBodyContent)
+		complete = true
+		return generateErrorResponse(
+			envoyTypePb.StatusCode_InternalServerError,
+			[]*configPb.HeaderValueOption{{Header: &configPb.HeaderValue{
+				Key: HeaderErrorResponseUnknown, RawValue: []byte("true"),
+			}}},
+			msg), complete
+	}
+	// Do not overwrite model, res can be empty.
+	usage = res.Usage
+
+	var requestEnd string
+	if usage.TotalTokens != 0 {
+		complete = true
+		// Update promptTokens and completeTokens
+		promptTokens = usage.PromptTokens
+		completionTokens = 0 // no completion tokens in embeddings request
+		// Count token per user.
+		if user.Name != "" {
+			tpm, err := s.ratelimiter.Incr(ctx, fmt.Sprintf("%v_TPM_CURRENT", user.Name), res.Usage.TotalTokens)
+			if err != nil {
+				return generateErrorResponse(
+					envoyTypePb.StatusCode_InternalServerError,
+					[]*configPb.HeaderValueOption{{Header: &configPb.HeaderValue{
+						Key: HeaderErrorIncrTPM, RawValue: []byte("true"),
+					}}},
+					err.Error()), complete
+			}
+
+			headers = append(headers,
+				&configPb.HeaderValueOption{
+					Header: &configPb.HeaderValue{
+						Key:      HeaderUpdateRPM,
+						RawValue: []byte(fmt.Sprintf("%d", rpm)),
+					},
+				},
+				&configPb.HeaderValueOption{
+					Header: &configPb.HeaderValue{
+						Key:      HeaderUpdateTPM,
+						RawValue: []byte(fmt.Sprintf("%d", tpm)),
+					},
+				},
+			)
+			requestEnd = fmt.Sprintf(requestEnd+"rpm: %d, tpm: %d, ", rpm, tpm)
+		}
+
+		if routerCtx != nil && routerCtx.HasRouted() {
+			targetPodIP := routerCtx.TargetAddress()
+			headers = append(headers,
+				&configPb.HeaderValueOption{
+					Header: &configPb.HeaderValue{
+						Key:      HeaderTargetPod,
+						RawValue: []byte(targetPodIP),
+					},
+				},
+				&configPb.HeaderValueOption{
+					Header: &configPb.HeaderValue{
+						Key:      HeaderRequestID,
+						RawValue: []byte(requestID),
+					},
+				},
+			)
+			requestEnd = fmt.Sprintf(requestEnd+"targetPod: %s", targetPodIP)
+		}
+
+		klog.Infof("request end, requestID: %s - %s", requestID, requestEnd)
+	} else if b.ResponseBody.EndOfStream {
+		complete = true
+	}
+
+	return &extProcPb.ProcessingResponse{
+		Response: &extProcPb.ProcessingResponse_ResponseBody{
+			ResponseBody: &extProcPb.BodyResponse{
+				Response: &extProcPb.CommonResponse{
+					HeaderMutation: &extProcPb.HeaderMutation{
+						SetHeaders: headers,
+					},
+				},
+			},
+		},
+	}, complete
+}

From 10b946b67e05d15a9ea4da27804b2e39ac7a053d Mon Sep 17 00:00:00 2001
From: Guillaume Calmettes <gcalmettes@scaleway.com>
Date: Fri, 20 Jun 2025 09:23:12 +0200
Subject: [PATCH 10/10] feat: re-used defined variables from openai utils in
 modelrouter

Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
---
 .../modelrouter/modelrouter_controller.go     | 39 ++++++++++---------
 pkg/plugins/gateway/util.go                   |  2 +-
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/pkg/controller/modelrouter/modelrouter_controller.go b/pkg/controller/modelrouter/modelrouter_controller.go
index dd0f1ab4c..a57da2523 100644
--- a/pkg/controller/modelrouter/modelrouter_controller.go
+++ b/pkg/controller/modelrouter/modelrouter_controller.go
@@ -37,19 +37,17 @@ import (
 	modelv1alpha1 "github.com/vllm-project/aibrix/api/model/v1alpha1"
 	orchestrationv1alpha1 "github.com/vllm-project/aibrix/api/orchestration/v1alpha1"
 	"github.com/vllm-project/aibrix/pkg/config"
+	aibrixgateway "github.com/vllm-project/aibrix/pkg/plugins/gateway"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gatewayv1beta1 "sigs.k8s.io/gateway-api/apis/v1beta1"
 )
 
 const (
 	// TODO (varun): cleanup model related identifiers and establish common consensus
-	modelHeaderIdentifier          = "model"
-	modelIdentifier                = "model.aibrix.ai/name"
-	modelPortIdentifier            = "model.aibrix.ai/port"
-	modelSupportedRoutesIdentifier = "model.aibrix.ai/supported-routes"
-	modelRouteEmbeddings           = "embeddings"
-	modelRouteChatCompletions      = "chat-completions"
-	modelRouteDefault              = modelRouteChatCompletions
+	modelHeaderIdentifier               = "model"
+	modelIdentifier                     = "model.aibrix.ai/name"
+	modelPortIdentifier                 = "model.aibrix.ai/port"
+	modelSupportedRequestTypeIdentifier = "model.aibrix.ai/supported-request-types"
 	// TODO (varun): parameterize it or dynamically resolve it
 	aibrixEnvoyGateway          = "aibrix-eg"
 	aibrixEnvoyGatewayNamespace = "aibrix-system"
@@ -57,6 +55,16 @@ const (
 	defaultModelServingPort = 8000
 )
 
+var (
+	requestTypeIdentifierToSupportedRoutePathPrefix = map[string][]string{
+		string(aibrixgateway.OpenAiRequestEmbeddingsType):      {string(aibrixgateway.OpenAiRequestEmbeddingsPath)},
+		string(aibrixgateway.OpenAiRequestChatCompletionsType): {string(aibrixgateway.OpenAiRequestCompletionsPath), string(aibrixgateway.OpenAiRequestChatCompletionsPath)},
+		string(aibrixgateway.OpenAiRequestCompletionsType):     {string(aibrixgateway.OpenAiRequestCompletionsPath), string(aibrixgateway.OpenAiRequestChatCompletionsPath)},
+	}
+
+	defaultSupportedRequestType = string(aibrixgateway.OpenAiRequestChatCompletionsType)
+)
+
 //+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
 //+kubebuilder:rbac:groups=orchestration.aibrix.ai,resources=rayclusterfleets,verbs=get;list;watch;create;update;patch;delete
 //+kubebuilder:rbac:groups=gateway.networking.k8s.io,resources=httproutes,verbs=get;list;watch;create;update;patch;delete
@@ -115,24 +123,19 @@ func Add(mgr manager.Manager, runtimeConfig config.RuntimeConfig) error {
 
 // getSupportedRoutesMatchFromLabelsOrDefault returns the HTTPRouteMatch based on the model route labels value
 func getSupportedRoutesMatchFromLabelsOrDefault(labels map[string]string, modelHeaderMatch gatewayv1.HTTPHeaderMatch) []gatewayv1.HTTPRouteMatch {
-	labelValueToRoutePathPrefix := map[string][]string{
-		modelRouteEmbeddings:      {"/v1/embeddings"},
-		modelRouteChatCompletions: {"/v1/completions", "/v1/chat/completions"},
-	}
-
 	var pathPrefixes []string
-	if routesLabelValue, ok := labels[modelSupportedRoutesIdentifier]; ok {
-		routes := strings.Split(routesLabelValue, ",")
-		for k, route := range labelValueToRoutePathPrefix {
-			if slices.Contains(routes, k) {
-				pathPrefixes = append(pathPrefixes, route...)
+	if routesLabelValue, ok := labels[modelSupportedRequestTypeIdentifier]; ok {
+		routesIdentifier := strings.Split(routesLabelValue, ",")
+		for id, paths := range requestTypeIdentifierToSupportedRoutePathPrefix {
+			if slices.Contains(routesIdentifier, id) {
+				pathPrefixes = append(pathPrefixes, paths...)
 			}
 		}
 	}
 
 	// Add the default pathPrefixes if no route defines via labels
 	if len(pathPrefixes) == 0 {
-		pathPrefixes = append(pathPrefixes, labelValueToRoutePathPrefix[modelRouteDefault]...)
+		pathPrefixes = append(pathPrefixes, requestTypeIdentifierToSupportedRoutePathPrefix[defaultSupportedRequestType]...)
 	}
 
 	var routesmatch []gatewayv1.HTTPRouteMatch
diff --git a/pkg/plugins/gateway/util.go b/pkg/plugins/gateway/util.go
index 54f2302a3..28770537f 100644
--- a/pkg/plugins/gateway/util.go
+++ b/pkg/plugins/gateway/util.go
@@ -187,7 +187,7 @@ func checkEmbeddingInputSequenceLen(requestID string, embeddingObj openai.Embedd
 		size = len(*input)
 		isArrayType = true
 	default:
-		// Should never happend, but if input is of an unexpected non-nil type, let's explicitly error log it.
+		// Should never happen, but if input is of an unexpected non-nil type, let's explicitly error log it.
 		// Size will be 0 in this case, which is then handled by the check below.
 		if input != nil {
 			klog.ErrorS(nil, "unhandled embedding input type", "requestID", requestID, "inputType", fmt.Sprintf("%T", input))