Skip to content

Commit f3b3390

Browse files
lenoxysCopilot
andauthored
Optimize Dependencies, Enhance Performance, and Improve Code Quality (#18)
* fix: handle optional stream value in chat request logging * refactor: update application state and client initialization to accept direct configuration parameters * refactor(chat): optimize security client usage by removing unnecessary cloning Replace multiple clones of SecurityClient with direct mutation using with_user_ip method. This improves performance by avoiding redundant cloning while maintaining the same security checks functionality. * perf(chat): optimize JSON parsing and serialization This commit improves performance in the chat handler by optimizing JSON operations: 1. Eliminate double JSON parsing: - Parse response body once into serde_json::Value - Reuse the same parsed value for metrics logging and ChatResponse - Reduces redundant parsing operations per request 2. Optimize response serialization: - Replace serde_json::to_vec with serde_json::to_writer - Write directly to Vec<u8> buffer to minimize allocations - Streamline response construction flow The changes reduce memory allocations and CPU usage, particularly under high load with many concurrent requests. No behavioral changes, purely performance focused. Impact: - Reduces memory allocations in the response path - Eliminates redundant JSON parsing - More efficient serialization for masked content responses - Cleaner error handling flow * fix(logging): include version in server startup log message * Bump version to 0.14.0 and update dependencies - Updated axum from 0.7.4 to 0.8.4 - Updated tower-http from 0.5.1 to 0.6.6 - Updated reqwest from 0.11.24 to 0.12.23 - Updated serde from 1.0.197 to 1.0.219 - Updated serde_yaml from 0.9.31 to 0.9.34 - Updated uuid from 1.7.0 to 1.18.0 - Updated bytes from 1.5.0 to 1.10.1 - Updated http-body-util from 0.1.0 to 0.1.3 - Updated tracing-subscriber from 0.3.18 to 0.3.18 (no change) - Updated futures-util from 0.3.30 to 0.3 (no change) - Updated pin-project from 1.1.3 to 1.1.3 (no change) * Optimize Rust application's library usage and improve code quality (#17) * Initial plan * Fix clippy warnings and optimize dependencies for better performance Co-authored-by: lenoxys <3996456+lenoxys@users.noreply.github.com> * Further optimize dependencies and improve memory efficiency patterns Co-authored-by: lenoxys <3996456+lenoxys@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: lenoxys <3996456+lenoxys@users.noreply.github.com> * refactor(chat): simplify state handling and clone security client for IP configuration * fix(chat): optimize response serialization and streamline streaming request handling --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
1 parent ba3d52d commit f3b3390

File tree

14 files changed

+490
-468
lines changed

14 files changed

+490
-468
lines changed

Cargo.lock

Lines changed: 387 additions & 366 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,23 @@
11
[package]
22
name = "panw-api-ollama"
3-
version = "0.13.0"
3+
version = "0.14.0"
44
edition = "2021"
55

66
[dependencies]
7-
axum = "0.7.4"
8-
tokio = { version = "1.36.0", features = ["full"] }
9-
tower-http = { version = "0.5.1", features = ["trace"] }
10-
reqwest = { version = "0.11.24", features = ["json", "stream"] }
11-
serde = { version = "1.0.197", features = ["derive"] }
7+
axum = "0.8.4"
8+
tokio = { version = "1.36.0", features = ["rt-multi-thread", "net", "signal", "macros", "time"] }
9+
tower-http = { version = "0.6.6", features = ["trace"] }
10+
reqwest = { version = "0.12.23", features = ["json", "stream"] }
11+
serde = { version = "1.0.219", features = ["derive"] }
1212
serde_json = "1.0.114"
13-
serde_yaml = "0.9.31"
14-
uuid = { version = "1.7.0", features = ["serde", "v4"] }
13+
serde_yml = "0.0.12"
14+
uuid = { version = "1.18.0", features = ["v4", "serde"] }
1515
futures-util = "0.3.30"
1616
tracing = "0.1.40"
17-
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
17+
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "fmt"] }
1818
thiserror = "1.0.57"
19-
bytes = "1.5.0"
19+
bytes = "1.10.1"
2020
async-stream = "0.3.5"
21-
http-body-util = "0.1.0"
22-
chrono = { version = "0.4", features = ["serde"] }
23-
futures = "0.3"
21+
http-body-util = "0.1.3"
22+
chrono = { version = "0.4", features = ["serde", "clock"], default-features = false }
2423
pin-project = "1.1.3"

src/config.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@ use tracing::{debug, info};
2424
/// configuration, including file access errors, YAML parsing issues,
2525
/// and validation of configuration values.
2626
#[derive(Debug, Error)]
27+
#[allow(clippy::enum_variant_names)]
2728
pub enum ConfigError {
2829
/// File I/O errors when reading the configuration file
2930
#[error("Failed to read config file: {0}")]
3031
IoError(#[from] std::io::Error),
3132

3233
/// YAML parsing errors in the configuration file
3334
#[error("Failed to parse config file: {0}")]
34-
ParseError(#[from] serde_yaml::Error),
35+
ParseError(#[from] serde_yml::Error),
3536

3637
/// Configuration validation errors
3738
#[error("Validation error: {0}")]
@@ -177,7 +178,7 @@ pub fn load_config(path: &str) -> Result<Config, ConfigError> {
177178
debug!("Successfully read configuration file");
178179

179180
// Parse YAML
180-
let mut config: Config = serde_yaml::from_str(&content)?;
181+
let mut config: Config = serde_yml::from_str(&content)?;
181182
debug!("Successfully parsed YAML configuration");
182183

183184
// Override with environment variables if present

src/handlers/chat.rs

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ pub async fn handle_chat(
6565
info!("Received chat request for model: {}", request.model);
6666
debug!(
6767
"Chat request details: stream={}, messages={}, client_ip={}",
68-
request.stream.unwrap(),
68+
request.stream.unwrap_or(false),
6969
request.messages.len(),
7070
addr.ip()
7171
);
7272

73-
// Configure security client with user's IP
73+
// Clone security client and configure with user's IP
7474
let mut security_client = state.security_client.clone();
7575
security_client.with_user_ip(addr.ip().to_string());
7676

@@ -178,18 +178,22 @@ async fn handle_non_streaming_chat(
178178
ApiError::InternalError("Failed to read response body".to_string())
179179
})?;
180180

181-
// Parse response
182-
let mut response_body: ChatResponse = serde_json::from_slice(&body_bytes).map_err(|e| {
181+
// Parse response once into Value
182+
let json_value: serde_json::Value = serde_json::from_slice(&body_bytes).map_err(|e| {
183183
error!("Failed to parse response: {}", e);
184184
ApiError::InternalError("Failed to parse response".to_string())
185185
})?;
186186

187187
debug!("Received response from Ollama, performing security assessment");
188188

189-
// Extract and log performance metrics if available
190-
if let Ok(json) = serde_json::from_slice::<serde_json::Value>(&body_bytes) {
191-
log_llm_metrics(&json, false);
192-
}
189+
// Extract and log performance metrics
190+
log_llm_metrics(&json_value, false);
191+
192+
// Convert to ChatResponse
193+
let mut response_body: ChatResponse = serde_json::from_value(json_value).map_err(|e| {
194+
error!("Failed to convert response: {}", e);
195+
ApiError::InternalError("Failed to convert response".to_string())
196+
})?;
193197

194198
// Security assessment on response content
195199
let assessment = state
@@ -204,20 +208,20 @@ async fn handle_non_streaming_chat(
204208
}
205209

206210
// If we have masked content, use it
207-
let output_bytes = if assessment.is_masked {
211+
let response = if assessment.is_masked {
208212
response_body.message.content = assessment.final_content;
209213
info!("Chat response passed security checks (with masked content), returning to client");
210-
serde_json::to_vec(&response_body)
211-
.map(Bytes::from)
212-
.map_err(|e| {
213-
error!("Failed to serialize modified response: {}", e);
214-
ApiError::InternalError("Failed to serialize response".to_string())
215-
})?
214+
215+
let json_bytes = serde_json::to_vec(&response_body).map_err(|e| {
216+
error!("Failed to serialize modified response: {}", e);
217+
ApiError::InternalError("Failed to serialize response".to_string())
218+
})?;
219+
build_json_response(Bytes::from(json_bytes))?
216220
} else {
217221
info!("Chat response passed security checks, returning to client");
218-
body_bytes
222+
build_json_response(body_bytes)?
219223
};
220-
Ok(build_json_response(output_bytes)?)
224+
Ok(response)
221225
}
222226

223227
// Handles streaming chat requests using the generic streaming handler.
@@ -242,12 +246,5 @@ async fn handle_streaming_chat(
242246

243247
let model = request.model.clone();
244248
// For streaming chat, we're dealing with responses from the LLM, so is_prompt should be false
245-
handle_streaming_request::<ChatRequest, ChatResponse>(
246-
&state,
247-
request,
248-
"/api/chat",
249-
&model,
250-
false,
251-
)
252-
.await
249+
handle_streaming_request::<ChatRequest>(&state, request, "/api/chat", &model, false).await
253250
}

src/handlers/embeddings.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,5 @@ pub async fn handle_embeddings(
3939
.bytes()
4040
.await
4141
.map_err(|e| ApiError::InternalError(e.to_string()))?;
42-
Ok(build_json_response(body_bytes)?)
42+
build_json_response(body_bytes)
4343
}

src/handlers/generate.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ async fn handle_non_streaming_generate(
149149
}
150150

151151
// Return safe response
152-
Ok(build_json_response(body_bytes)?)
152+
build_json_response(body_bytes)
153153
}
154154

155155
// Handles streaming generate requests.
@@ -171,7 +171,7 @@ async fn handle_streaming_generate(
171171

172172
let model = request.model.clone();
173173
// For streaming generate, we're dealing with responses from the LLM, so is_prompt should be false
174-
handle_streaming_request::<GenerateRequest, GenerateResponse>(
174+
handle_streaming_request::<GenerateRequest>(
175175
&state,
176176
request,
177177
"/api/generate",

src/handlers/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub mod version;
2020
// security assessment, and internal server issues into a unified error type
2121
// that can be converted into appropriate HTTP responses.
2222
#[derive(Debug, thiserror::Error)]
23+
#[allow(clippy::enum_variant_names)]
2324
pub enum ApiError {
2425
// Errors from the Ollama backend service.
2526
//

src/handlers/models.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ async fn forward_to_ollama<T: Serialize>(
103103
.bytes()
104104
.await
105105
.map_err(|e| ApiError::InternalError(e.to_string()))?;
106-
Ok(build_json_response(body_bytes)?)
106+
build_json_response(body_bytes)
107107
}
108108

109109
// Handler for listing models (GET /api/tags)

src/handlers/utils.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use axum::{body::Body, response::Response};
44
use bytes::Bytes;
55
use futures_util::stream::StreamExt;
66
use http_body_util::StreamBody;
7-
use serde::{de::DeserializeOwned, Serialize};
7+
use serde::Serialize;
88
use tracing::{error, info};
99

1010
// Builds an HTTP response with JSON content type from the provided bytes.
@@ -21,7 +21,7 @@ fn convert_stream_error(err: reqwest::Error) -> reqwest::Error {
2121
}
2222

2323
// Handles streaming requests to API endpoints, applying security assessment to the streamed responses.
24-
pub async fn handle_streaming_request<T, R>(
24+
pub async fn handle_streaming_request<T>(
2525
state: &AppState,
2626
request: T,
2727
endpoint: &str,
@@ -30,7 +30,6 @@ pub async fn handle_streaming_request<T, R>(
3030
) -> Result<Response<Body>, ApiError>
3131
where
3232
T: Serialize + Send + 'static,
33-
R: DeserializeOwned + Serialize + Send + Sync + Unpin + 'static,
3433
{
3534
// Get the original stream from ollama client
3635
let stream = state.ollama_client.stream(endpoint, &request).await?;
@@ -55,16 +54,14 @@ where
5554
Err(e) => {
5655
error!("Error in security assessment stream: {:?}", e);
5756
// Convert error to a user-friendly message
58-
let error_message = match e {
59-
_ => "Error processing response",
60-
};
57+
const ERROR_MESSAGE: &str = "Error processing response";
6158
let error_json = serde_json::json!({
6259
"model": model_string,
63-
"error": error_message,
60+
"error": ERROR_MESSAGE,
6461
"done": true
6562
});
6663
let error_bytes = serde_json::to_vec(&error_json)
67-
.unwrap_or_else(|_| error_message.as_bytes().to_vec());
64+
.unwrap_or_else(|_| ERROR_MESSAGE.as_bytes().to_vec());
6865
Ok(Bytes::from(error_bytes))
6966
}
7067
});

src/handlers/version.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ pub async fn handle_version(State(state): State<AppState>) -> Result<Response, A
1313
.await
1414
.map_err(|e| ApiError::InternalError(e.to_string()))?;
1515

16-
Ok(build_json_response(body_bytes)?)
16+
build_json_response(body_bytes)
1717
}

0 commit comments

Comments
 (0)