-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Labels
P2: MediumMedium priority - fix when possibleMedium priority - fix when possibleperformancePerformance improvementPerformance improvement
Description
Problem
In src/routes/chat.rs:18-30, the request body undergoes redundant serialization:
let request_body = read_body_with_limit(body, state.config.max_audio_request_size).await?;
let mut request_json: serde_json::Value = serde_json::from_slice(&request_body)?;
strip_empty_tool_calls(&mut request_json);
let modified_body = serde_json::to_vec(&request_json)?;This sequence:
- Reads bytes from client
- Parses as JSON
- Modifies (strips empty tool_calls)
- Serializes back to bytes
- Sends to backend
However, if strip_empty_tool_calls finds nothing to strip (the common case), steps 3-4 are wasted CPU cycles.
Impact
Medium - Adds unnecessary latency and CPU usage to every chat completion request, even when no modification is needed.
Solution
Track whether modifications were actually made:
let request_body = read_body_with_limit(body, state.config.max_audio_request_size).await?;
let mut request_json: serde_json::Value = serde_json::from_slice(&request_body)?;
let was_modified = strip_empty_tool_calls(&mut request_json);
let final_body = if was_modified {
serde_json::to_vec(&request_json)?
} else {
request_body // Use original bytes
};
// Update strip_empty_tool_calls to return bool:
fn strip_empty_tool_calls(payload: &mut serde_json::Value) -> bool {
let mut modified = false;
if let Some(messages) = payload.get_mut("messages").and_then(|m| m.as_array_mut()) {
for message in messages.iter_mut() {
if let Some(obj) = message.as_object_mut() {
if let Some(tool_calls) = obj.get("tool_calls") {
if tool_calls.as_array().map(|a| a.is_empty()).unwrap_or(false) {
obj.remove("tool_calls");
modified = true;
}
}
}
}
}
modified
}Benchmark
Add a benchmark to quantify the improvement:
// benches/serialization.rs
#[bench]
fn bench_chat_with_strip(b: &mut Bencher) {
let request = r#"{"messages":[{"role":"user","content":"hi"}]}"#;
b.iter(|| {
let mut json: Value = serde_json::from_slice(request.as_bytes()).unwrap();
strip_empty_tool_calls(&mut json);
serde_json::to_vec(&json).unwrap()
});
}
#[bench]
fn bench_chat_without_strip(b: &mut Bencher) {
let request = r#"{"messages":[{"role":"user","content":"hi"}]}"#;
b.iter(|| {
let json: Value = serde_json::from_slice(request.as_bytes()).unwrap();
let needs_strip = check_needs_strip(&json);
if needs_strip {
serde_json::to_vec(&json).unwrap()
} else {
request.as_bytes().to_vec()
}
});
}File Location
src/routes/chat.rs:16-54
Related
Similar optimization could apply to other JSON endpoints if they add request modification in the future.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
P2: MediumMedium priority - fix when possibleMedium priority - fix when possibleperformancePerformance improvementPerformance improvement