Skip to content

Commit d750fee

Browse files
yossiovadiaclaude
andcommitted
fix: update response model field to match routing decision
The response JSON model field now correctly reflects the semantic router's decision instead of using the model name from the vLLM endpoint. Changes: - Parse response JSON and update model field to ctx.RequestModel - Re-marshal modified response for cache and client - Only modify non-streaming responses - Fallback to original response on marshal errors This ensures API consumers can determine which model was selected by examining the standard model field, rather than requiring custom headers or log inspection. Fixes #430 Co-Authored-By: Claude <[email protected]> Signed-off-by: Yossi Ovadia <[email protected]>
1 parent 45240d8 commit d750fee

File tree

1 file changed

+29
-1
lines changed

1 file changed

+29
-1
lines changed

src/semantic-router/pkg/extproc/response_handler.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,25 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
217217
observability.Errorf("Error parsing tokens from response: %v", err)
218218
metrics.RecordRequestError(ctx.RequestModel, "parse_error")
219219
}
220+
221+
// Update the model field in the response to match the routing decision
222+
// This ensures the API response reflects which model was selected by the semantic router
223+
modelFieldUpdated := false
224+
if ctx.RequestModel != "" && parsed.Model != ctx.RequestModel {
225+
observability.Infof("Updating response model field from '%s' to '%s' (routing decision)", parsed.Model, ctx.RequestModel)
226+
parsed.Model = ctx.RequestModel
227+
228+
// Re-marshal the response with the updated model field
229+
modifiedBody, err := json.Marshal(parsed)
230+
if err != nil {
231+
observability.Errorf("Error re-marshaling response with updated model field: %v", err)
232+
// Fall back to original response body on error
233+
} else {
234+
responseBody = modifiedBody
235+
modelFieldUpdated = true
236+
}
237+
}
238+
220239
promptTokens := int(parsed.Usage.PromptTokens)
221240
completionTokens := int(parsed.Usage.CompletionTokens)
222241

@@ -281,7 +300,7 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
281300
}
282301
}
283302

284-
// Allow the response to continue without modification
303+
// Return the response (with modified model field if updated)
285304
response := &ext_proc.ProcessingResponse{
286305
Response: &ext_proc.ProcessingResponse_ResponseBody{
287306
ResponseBody: &ext_proc.BodyResponse{
@@ -292,5 +311,14 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
292311
},
293312
}
294313

314+
// If we updated the model field, include the modified body in the response
315+
if modelFieldUpdated {
316+
response.GetResponseBody().Response.BodyMutation = &ext_proc.BodyMutation{
317+
Mutation: &ext_proc.BodyMutation_Body{
318+
Body: responseBody,
319+
},
320+
}
321+
}
322+
295323
return response, nil
296324
}

0 commit comments

Comments
 (0)