Optimize Dependencies, Enhance Performance, and Improve Code Quality (#18)

lenoxys · Copilot · web-flow · commit f3b33909fad6 · 2025-08-28T13:55:22.000+02:00
* fix: handle optional stream value in chat request logging * refactor: update application state and client initialization to accept direct configuration parameters * refactor(chat): optimize security client usage by removing unnecessary cloning Replace multiple clones of SecurityClient with direct mutation using with_user_ip method. This improves performance by avoiding redundant cloning while maintaining the same security checks functionality. * perf(chat): optimize JSON parsing and serialization This commit improves performance in the chat handler by optimizing JSON operations: 1. Eliminate double JSON parsing: - Parse response body once into serde_json::Value - Reuse the same parsed value for metrics logging and ChatResponse - Reduces redundant parsing operations per request 2. Optimize response serialization: - Replace serde_json::to_vec with serde_json::to_writer - Write directly to Vec<u8> buffer to minimize allocations - Streamline response construction flow The changes reduce memory allocations and CPU usage, particularly under high load with many concurrent requests. No behavioral changes, purely performance focused. Impact: - Reduces memory allocations in the response path - Eliminates redundant JSON parsing - More efficient serialization for masked content responses - Cleaner error handling flow * fix(logging): include version in server startup log message * Bump version to 0.14.0 and update dependencies - Updated axum from 0.7.4 to 0.8.4 - Updated tower-http from 0.5.1 to 0.6.6 - Updated reqwest from 0.11.24 to 0.12.23 - Updated serde from 1.0.197 to 1.0.219 - Updated serde_yaml from 0.9.31 to 0.9.34 - Updated uuid from 1.7.0 to 1.18.0 - Updated bytes from 1.5.0 to 1.10.1 - Updated http-body-util from 0.1.0 to 0.1.3 - Updated tracing-subscriber from 0.3.18 to 0.3.18 (no change) - Updated futures-util from 0.3.30 to 0.3 (no change) - Updated pin-project from 1.1.3 to 1.1.3 (no change) * Optimize Rust application's library usage and improve code quality (#17) * Initial plan * Fix clippy warnings and optimize dependencies for better performance Co-authored-by: lenoxys <3996456+lenoxys@users.noreply.github.com> * Further optimize dependencies and improve memory efficiency patterns Co-authored-by: lenoxys <3996456+lenoxys@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: lenoxys <3996456+lenoxys@users.noreply.github.com> * refactor(chat): simplify state handling and clone security client for IP configuration * fix(chat): optimize response serialization and streamline streaming request handling --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,24 +1,23 @@
 [package]
 name = "panw-api-ollama"
-version = "0.13.0"
+version = "0.14.0"
 edition = "2021"
 
 [dependencies]
-axum = "0.7.4"
-tokio = { version = "1.36.0", features = ["full"] }
-tower-http = { version = "0.5.1", features = ["trace"] }
-reqwest = { version = "0.11.24", features = ["json", "stream"] }
-serde = { version = "1.0.197", features = ["derive"] }
+axum = "0.8.4"
+tokio = { version = "1.36.0", features = ["rt-multi-thread", "net", "signal", "macros", "time"] }
+tower-http = { version = "0.6.6", features = ["trace"] }
+reqwest = { version = "0.12.23", features = ["json", "stream"] }
+serde = { version = "1.0.219", features = ["derive"] }
 serde_json = "1.0.114"
-serde_yaml = "0.9.31"
-uuid = { version = "1.7.0", features = ["serde", "v4"] }
+serde_yml = "0.0.12"
+uuid = { version = "1.18.0", features = ["v4", "serde"] }
 futures-util = "0.3.30"
 tracing = "0.1.40"
-tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
+tracing-subscriber = { version = "0.3.18", features = ["env-filter", "fmt"] }
 thiserror = "1.0.57"
-bytes = "1.5.0"
+bytes = "1.10.1"
 async-stream = "0.3.5"
-http-body-util = "0.1.0"
-chrono = { version = "0.4", features = ["serde"] }
-futures = "0.3"
+http-body-util = "0.1.3"
+chrono = { version = "0.4", features = ["serde", "clock"], default-features = false }
 pin-project = "1.1.3"
diff --git a/src/config.rs b/src/config.rs
@@ -24,14 +24,15 @@ use tracing::{debug, info};
 /// configuration, including file access errors, YAML parsing issues,
 /// and validation of configuration values.
 #[derive(Debug, Error)]
+#[allow(clippy::enum_variant_names)]
 pub enum ConfigError {
     /// File I/O errors when reading the configuration file
     #[error("Failed to read config file: {0}")]
     IoError(#[from] std::io::Error),
 
     /// YAML parsing errors in the configuration file
     #[error("Failed to parse config file: {0}")]
-    ParseError(#[from] serde_yaml::Error),
+    ParseError(#[from] serde_yml::Error),
 
     /// Configuration validation errors
     #[error("Validation error: {0}")]
@@ -177,7 +178,7 @@ pub fn load_config(path: &str) -> Result<Config, ConfigError> {
         debug!("Successfully read configuration file");
 
         // Parse YAML
-        let mut config: Config = serde_yaml::from_str(&content)?;
+        let mut config: Config = serde_yml::from_str(&content)?;
         debug!("Successfully parsed YAML configuration");
 
         // Override with environment variables if present
diff --git a/src/handlers/chat.rs b/src/handlers/chat.rs
@@ -65,12 +65,12 @@ pub async fn handle_chat(
     info!("Received chat request for model: {}", request.model);
     debug!(
         "Chat request details: stream={}, messages={}, client_ip={}",
-        request.stream.unwrap(),
+        request.stream.unwrap_or(false),
         request.messages.len(),
         addr.ip()
     );
 
-    // Configure security client with user's IP
+    // Clone security client and configure with user's IP
     let mut security_client = state.security_client.clone();
     security_client.with_user_ip(addr.ip().to_string());
 
@@ -178,18 +178,22 @@ async fn handle_non_streaming_chat(
         ApiError::InternalError("Failed to read response body".to_string())
     })?;
 
-    // Parse response
-    let mut response_body: ChatResponse = serde_json::from_slice(&body_bytes).map_err(|e| {
+    // Parse response once into Value
+    let json_value: serde_json::Value = serde_json::from_slice(&body_bytes).map_err(|e| {
         error!("Failed to parse response: {}", e);
         ApiError::InternalError("Failed to parse response".to_string())
     })?;
 
     debug!("Received response from Ollama, performing security assessment");
 
-    // Extract and log performance metrics if available
-    if let Ok(json) = serde_json::from_slice::<serde_json::Value>(&body_bytes) {
-        log_llm_metrics(&json, false);
-    }
+    // Extract and log performance metrics
+    log_llm_metrics(&json_value, false);
+
+    // Convert to ChatResponse
+    let mut response_body: ChatResponse = serde_json::from_value(json_value).map_err(|e| {
+        error!("Failed to convert response: {}", e);
+        ApiError::InternalError("Failed to convert response".to_string())
+    })?;
 
     // Security assessment on response content
     let assessment = state
@@ -204,20 +208,20 @@ async fn handle_non_streaming_chat(
     }
 
     // If we have masked content, use it
-    let output_bytes = if assessment.is_masked {
+    let response = if assessment.is_masked {
         response_body.message.content = assessment.final_content;
         info!("Chat response passed security checks (with masked content), returning to client");
-        serde_json::to_vec(&response_body)
-            .map(Bytes::from)
-            .map_err(|e| {
-                error!("Failed to serialize modified response: {}", e);
-                ApiError::InternalError("Failed to serialize response".to_string())
-            })?
+
+        let json_bytes = serde_json::to_vec(&response_body).map_err(|e| {
+            error!("Failed to serialize modified response: {}", e);
+            ApiError::InternalError("Failed to serialize response".to_string())
+        })?;
+        build_json_response(Bytes::from(json_bytes))?
     } else {
         info!("Chat response passed security checks, returning to client");
-        body_bytes
+        build_json_response(body_bytes)?
     };
-    Ok(build_json_response(output_bytes)?)
+    Ok(response)
 }
 
 // Handles streaming chat requests using the generic streaming handler.
@@ -242,12 +246,5 @@ async fn handle_streaming_chat(
 
     let model = request.model.clone();
     // For streaming chat, we're dealing with responses from the LLM, so is_prompt should be false
-    handle_streaming_request::<ChatRequest, ChatResponse>(
-        &state,
-        request,
-        "/api/chat",
-        &model,
-        false,
-    )
-    .await
+    handle_streaming_request::<ChatRequest>(&state, request, "/api/chat", &model, false).await
 }
diff --git a/src/handlers/embeddings.rs b/src/handlers/embeddings.rs
@@ -39,5 +39,5 @@ pub async fn handle_embeddings(
         .bytes()
         .await
         .map_err(|e| ApiError::InternalError(e.to_string()))?;
-    Ok(build_json_response(body_bytes)?)
+    build_json_response(body_bytes)
 }
diff --git a/src/handlers/generate.rs b/src/handlers/generate.rs
@@ -149,7 +149,7 @@ async fn handle_non_streaming_generate(
     }
 
     // Return safe response
-    Ok(build_json_response(body_bytes)?)
+    build_json_response(body_bytes)
 }
 
 // Handles streaming generate requests.
@@ -171,7 +171,7 @@ async fn handle_streaming_generate(
 
     let model = request.model.clone();
     // For streaming generate, we're dealing with responses from the LLM, so is_prompt should be false
-    handle_streaming_request::<GenerateRequest, GenerateResponse>(
+    handle_streaming_request::<GenerateRequest>(
         &state,
         request,
         "/api/generate",
diff --git a/src/handlers/mod.rs b/src/handlers/mod.rs
@@ -20,6 +20,7 @@ pub mod version;
 // security assessment, and internal server issues into a unified error type
 // that can be converted into appropriate HTTP responses.
 #[derive(Debug, thiserror::Error)]
+#[allow(clippy::enum_variant_names)]
 pub enum ApiError {
     // Errors from the Ollama backend service.
     //
diff --git a/src/handlers/models.rs b/src/handlers/models.rs
@@ -103,7 +103,7 @@ async fn forward_to_ollama<T: Serialize>(
         .bytes()
         .await
         .map_err(|e| ApiError::InternalError(e.to_string()))?;
-    Ok(build_json_response(body_bytes)?)
+    build_json_response(body_bytes)
 }
 
 // Handler for listing models (GET /api/tags)
diff --git a/src/handlers/utils.rs b/src/handlers/utils.rs
@@ -4,7 +4,7 @@ use axum::{body::Body, response::Response};
 use bytes::Bytes;
 use futures_util::stream::StreamExt;
 use http_body_util::StreamBody;
-use serde::{de::DeserializeOwned, Serialize};
+use serde::Serialize;
 use tracing::{error, info};
 
 // Builds an HTTP response with JSON content type from the provided bytes.
@@ -21,7 +21,7 @@ fn convert_stream_error(err: reqwest::Error) -> reqwest::Error {
 }
 
 // Handles streaming requests to API endpoints, applying security assessment to the streamed responses.
-pub async fn handle_streaming_request<T, R>(
+pub async fn handle_streaming_request<T>(
     state: &AppState,
     request: T,
     endpoint: &str,
@@ -30,7 +30,6 @@ pub async fn handle_streaming_request<T, R>(
 ) -> Result<Response<Body>, ApiError>
 where
     T: Serialize + Send + 'static,
-    R: DeserializeOwned + Serialize + Send + Sync + Unpin + 'static,
 {
     // Get the original stream from ollama client
     let stream = state.ollama_client.stream(endpoint, &request).await?;
@@ -55,16 +54,14 @@ where
         Err(e) => {
             error!("Error in security assessment stream: {:?}", e);
             // Convert error to a user-friendly message
-            let error_message = match e {
-                _ => "Error processing response",
-            };
+            const ERROR_MESSAGE: &str = "Error processing response";
             let error_json = serde_json::json!({
                 "model": model_string,
-                "error": error_message,
+                "error": ERROR_MESSAGE,
                 "done": true
             });
             let error_bytes = serde_json::to_vec(&error_json)
-                .unwrap_or_else(|_| error_message.as_bytes().to_vec());
+                .unwrap_or_else(|_| ERROR_MESSAGE.as_bytes().to_vec());
             Ok(Bytes::from(error_bytes))
         }
     });
diff --git a/src/handlers/version.rs b/src/handlers/version.rs
@@ -13,5 +13,5 @@ pub async fn handle_version(State(state): State<AppState>) -> Result<Response, A
         .await
         .map_err(|e| ApiError::InternalError(e.to_string()))?;
 
-    Ok(build_json_response(body_bytes)?)
+    build_json_response(body_bytes)
 }
diff --git a/src/main.rs b/src/main.rs
@@ -136,7 +136,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     setup_logging(&config.server.debug_level);
 
     // Create application state
-    let state = build_app_state(&config)?;
+    let state = build_app_state(
+        config.ollama.base_url,
+        config.security
+    )?;
     info!("Application state initialized successfully");
 
     // Build router with all the Ollama API endpoints
@@ -178,7 +181,8 @@ fn setup_logging(debug_level_str: &str) {
         .init();
 
     info!(
-        "Starting panw-api-ollama server with log level: {}",
+        "Starting panw-api-ollama v{} server with log level: {}",
+        env!("CARGO_PKG_VERSION"),
         debug_level
     );
 }
@@ -196,22 +200,25 @@ fn setup_logging(debug_level_str: &str) {
 ///
 /// * `Ok(AppState)` - Initialized application state
 /// * `Err` - If client creation or initialization fails
-fn build_app_state(config: &config::Config) -> Result<AppState, Box<dyn std::error::Error>> {
+fn build_app_state(
+    ollama_base_url: String,
+    security_config: config::SecurityConfig
+) -> Result<AppState, Box<dyn std::error::Error>> {
     info!("Building application state with configured clients");
 
     // Create Ollama client
-    let ollama_client = OllamaClient::new(&config.ollama.base_url);
+    let ollama_client = OllamaClient::new(ollama_base_url.clone());
     info!(
         "Created Ollama client with base URL: {}",
-        config.ollama.base_url
+        ollama_base_url
     );
 
     // Create security client
-    let security_client = SecurityClient::new(&config.security);
+    let security_client = SecurityClient::new(security_config);
 
     info!(
         "Created security client with base URL: {}",
-        config.security.base_url
+        security_client.base_url()
     );
 
     // Build the application state using the builder pattern
@@ -255,14 +262,12 @@ fn build_router(state: AppState) -> Router {
     let utility_routes = Router::new().route("/api/version", get(version::handle_version));
 
     // Combine all routes
-    let app = Router::new()
+    Router::new()
         .merge(generation_routes)
         .merge(model_routes)
         .merge(utility_routes)
         .layer(TraceLayer::new_for_http())
-        .with_state(state);
-
-    app
+        .with_state(state)
 }
 
 /// Starts the HTTP server with the configured router.
diff --git a/src/ollama.rs b/src/ollama.rs
@@ -22,6 +22,7 @@ use tracing::{debug, error};
 // This enum represents various failure modes when communicating with
 // Ollama services, including network issues and API-level errors.
 #[derive(Debug, Error)]
+#[allow(clippy::enum_variant_names)]
 pub enum OllamaError {
     // HTTP request errors (connection failures, timeouts, etc.)
     #[error("HTTP request failed: {0}")]
@@ -70,10 +71,10 @@ impl OllamaClient {
     // ```
     // let client = OllamaClient::new("http://localhost:11434");
     // ```
-    pub fn new(base_url: &str) -> Self {
+    pub fn new(base_url: String) -> Self {
         Self {
             client: Client::new(),
-            base_url: base_url.to_string(),
+            base_url,
         }
     }
 
diff --git a/src/security.rs b/src/security.rs
@@ -267,15 +267,15 @@ impl SecurityClient {
     // * `profile_name` - Name of the AI security profile to use for assessments
     // * `app_name` - Name of the application using this security client
     // * `app_user` - Identifier for the user or context within the application
-    pub fn new(config: &SecurityConfig) -> Self {
+    pub fn new(config: SecurityConfig) -> Self {
         Self {
             client: Client::new(),
-            base_url: config.base_url.clone(),
-            api_key: config.api_key.clone(),
-            profile_name: config.profile_name.clone(),
-            app_name: config.app_name.clone(),
-            app_user: config.app_user.clone(),
-            contextual_grounding_context: config.contextual_grounding.clone(),
+            base_url: config.base_url,
+            api_key: config.api_key,
+            profile_name: config.profile_name,
+            app_name: config.app_name,
+            app_user: config.app_user,
+            contextual_grounding_context: config.contextual_grounding,
             user_ip: None,
         }
     }
@@ -284,6 +284,11 @@ impl SecurityClient {
     // Public API Methods
     //--------------------------------------------------------------------------
 
+    /// Returns the base URL of the security service
+    pub fn base_url(&self) -> &str {
+        &self.base_url
+    }
+
     /// Sets the user IP address for subsequent security assessments
     ///
     /// # Arguments
@@ -466,8 +471,8 @@ impl SecurityClient {
     fn create_safe_assessment(&self) -> Assessment {
         Assessment {
             is_safe: true,
-            category: "benign".to_string(),
-            action: "allow".to_string(),
+            category: "benign".to_owned(),
+            action: "allow".to_owned(),
             final_content: String::new(),
             is_masked: false,
             details: ScanResponse::default_safe_response(),
diff --git a/src/stream.rs b/src/stream.rs

Original file line number	Diff line number	Diff line change
`@@ -39,5 +39,5 @@ pub async fn handle_embeddings(`
`39`	`39`	`.bytes()`
`40`	`40`	`.await`
`41`	`41`	`.map_err(\|e\| ApiError::InternalError(e.to_string()))?;`
`42`		`- Ok(build_json_response(body_bytes)?)`
	`42`	`+ build_json_response(body_bytes)`
`43`	`43`	`}`
Original file line number	Diff line number	Diff line change
`@@ -149,7 +149,7 @@ async fn handle_non_streaming_generate(`
`149`	`149`	`}`
`150`	`150`
`151`	`151`	`// Return safe response`
`152`		`- Ok(build_json_response(body_bytes)?)`
	`152`	`+ build_json_response(body_bytes)`
`153`	`153`	`}`
`154`	`154`
`155`	`155`	`// Handles streaming generate requests.`
`@@ -171,7 +171,7 @@ async fn handle_streaming_generate(`
`171`	`171`
`172`	`172`	`let model = request.model.clone();`
`173`	`173`	`// For streaming generate, we're dealing with responses from the LLM, so is_prompt should be false`
`174`		`- handle_streaming_request::<GenerateRequest, GenerateResponse>(`
	`174`	`+ handle_streaming_request::<GenerateRequest>(`
`175`	`175`	`&state,`
`176`	`176`	`request,`
`177`	`177`	`"/api/generate",`
Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ async fn forward_to_ollama<T: Serialize>(`
`103`	`103`	`.bytes()`
`104`	`104`	`.await`
`105`	`105`	`.map_err(\|e\| ApiError::InternalError(e.to_string()))?;`
`106`		`- Ok(build_json_response(body_bytes)?)`
	`106`	`+ build_json_response(body_bytes)`
`107`	`107`	`}`
`108`	`108`
`109`	`109`	`// Handler for listing models (GET /api/tags)`
Original file line number	Diff line number	Diff line change
`@@ -13,5 +13,5 @@ pub async fn handle_version(State(state): State<AppState>) -> Result<Response, A`
`13`	`13`	`.await`
`14`	`14`	`.map_err(\|e\| ApiError::InternalError(e.to_string()))?;`
`15`	`15`
`16`		`- Ok(build_json_response(body_bytes)?)`
	`16`	`+ build_json_response(body_bytes)`
`17`	`17`	`}`