From d750fee85d737e345f49bbadd59c5499a2457340 Mon Sep 17 00:00:00 2001 From: Yossi Ovadia Date: Tue, 14 Oct 2025 12:32:25 -0700 Subject: [PATCH] fix: update response model field to match routing decision The response JSON model field now correctly reflects the semantic router's decision instead of using the model name from the vLLM endpoint. Changes: - Parse response JSON and update model field to ctx.RequestModel - Re-marshal modified response for cache and client - Only modify non-streaming responses - Fallback to original response on marshal errors This ensures API consumers can determine which model was selected by examining the standard model field, rather than requiring custom headers or log inspection. Fixes #430 Co-Authored-By: Claude Signed-off-by: Yossi Ovadia --- .../pkg/extproc/response_handler.go | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/semantic-router/pkg/extproc/response_handler.go b/src/semantic-router/pkg/extproc/response_handler.go index ab5fc4fe..633e04b2 100644 --- a/src/semantic-router/pkg/extproc/response_handler.go +++ b/src/semantic-router/pkg/extproc/response_handler.go @@ -217,6 +217,25 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response observability.Errorf("Error parsing tokens from response: %v", err) metrics.RecordRequestError(ctx.RequestModel, "parse_error") } + + // Update the model field in the response to match the routing decision + // This ensures the API response reflects which model was selected by the semantic router + modelFieldUpdated := false + if ctx.RequestModel != "" && parsed.Model != ctx.RequestModel { + observability.Infof("Updating response model field from '%s' to '%s' (routing decision)", parsed.Model, ctx.RequestModel) + parsed.Model = ctx.RequestModel + + // Re-marshal the response with the updated model field + modifiedBody, err := json.Marshal(parsed) + if err != nil { + observability.Errorf("Error re-marshaling response with updated model field: %v", err) + // Fall back to original response body on error + } else { + responseBody = modifiedBody + modelFieldUpdated = true + } + } + promptTokens := int(parsed.Usage.PromptTokens) completionTokens := int(parsed.Usage.CompletionTokens) @@ -281,7 +300,7 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response } } - // Allow the response to continue without modification + // Return the response (with modified model field if updated) response := &ext_proc.ProcessingResponse{ Response: &ext_proc.ProcessingResponse_ResponseBody{ ResponseBody: &ext_proc.BodyResponse{ @@ -292,5 +311,14 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response }, } + // If we updated the model field, include the modified body in the response + if modelFieldUpdated { + response.GetResponseBody().Response.BodyMutation = &ext_proc.BodyMutation{ + Mutation: &ext_proc.BodyMutation_Body{ + Body: responseBody, + }, + } + } + return response, nil }