diff --git a/src/database/analytics_repo.rs b/src/database/analytics_repo.rs index 8f91c9d..70475ad 100644 --- a/src/database/analytics_repo.rs +++ b/src/database/analytics_repo.rs @@ -1,7 +1,6 @@ use anyhow::{Context, Result as AnyhowResult}; -use chrono::{DateTime, Duration, Utc}; -use sqlx::{Pool, Row, Sqlite}; -use std::collections::HashMap; +use chrono::{DateTime, Utc}; +use sqlx::{Pool, Sqlite}; use super::connection::DatabaseManager; use crate::models::Analytics; @@ -17,372 +16,6 @@ impl AnalyticsRepository { } } - pub async fn get_daily_usage_stats( - &self, - date: DateTime, - ) -> AnyhowResult { - let start_of_day = date.date_naive().and_hms_opt(0, 0, 0).unwrap().and_utc(); - let end_of_day = start_of_day + Duration::days(1); - - // Get total sessions - let total_sessions: i64 = sqlx::query_scalar( - "SELECT COUNT(*) FROM chat_sessions WHERE created_at >= ? AND created_at < ?", - ) - .bind(start_of_day.to_rfc3339()) - .bind(end_of_day.to_rfc3339()) - .fetch_one(&self.pool) - .await - .context("Failed to get total sessions")?; - - // Get total messages - let total_messages: i64 = sqlx::query_scalar( - r#" - SELECT COUNT(*) FROM messages m - JOIN chat_sessions cs ON m.session_id = cs.id - WHERE cs.created_at >= ? AND cs.created_at < ? - "#, - ) - .bind(start_of_day.to_rfc3339()) - .bind(end_of_day.to_rfc3339()) - .fetch_one(&self.pool) - .await - .context("Failed to get total messages")?; - - // Get total tokens - let total_tokens: i64 = sqlx::query_scalar( - "SELECT COALESCE(SUM(token_count), 0) FROM chat_sessions WHERE created_at >= ? AND created_at < ?" - ) - .bind(start_of_day.to_rfc3339()) - .bind(end_of_day.to_rfc3339()) - .fetch_one(&self.pool) - .await - .context("Failed to get total tokens")?; - - // Get provider usage - let provider_rows = sqlx::query( - "SELECT provider, COUNT(*) as count FROM chat_sessions WHERE created_at >= ? AND created_at < ? GROUP BY provider" - ) - .bind(start_of_day.to_rfc3339()) - .bind(end_of_day.to_rfc3339()) - .fetch_all(&self.pool) - .await - .context("Failed to get provider usage")?; - - let mut provider_usage = HashMap::new(); - for row in provider_rows { - let provider: String = row.try_get("provider")?; - let count: i64 = row.try_get("count")?; - provider_usage.insert(provider, count as u32); - } - - // Get hourly distribution - let hourly_rows = sqlx::query( - r#" - SELECT strftime('%H', created_at) as hour, COUNT(*) as count - FROM chat_sessions - WHERE created_at >= ? AND created_at < ? - GROUP BY hour - ORDER BY hour - "#, - ) - .bind(start_of_day.to_rfc3339()) - .bind(end_of_day.to_rfc3339()) - .fetch_all(&self.pool) - .await - .context("Failed to get hourly distribution")?; - - let mut hourly_distribution = vec![0u32; 24]; - for row in hourly_rows { - let hour_str: String = row.try_get("hour")?; - let count: i64 = row.try_get("count")?; - if let Ok(hour) = hour_str.parse::() { - if (hour as usize) < 24 { - hourly_distribution[hour as usize] = count as u32; - } - } - } - - Ok(DailyUsageStats { - date, - total_sessions: total_sessions as u32, - total_messages: total_messages as u32, - total_tokens: total_tokens as u64, - provider_usage, - hourly_distribution, - avg_session_length: if total_sessions > 0 { - total_messages as f64 / total_sessions as f64 - } else { - 0.0 - }, - avg_tokens_per_session: if total_sessions > 0 { - total_tokens as f64 / total_sessions as f64 - } else { - 0.0 - }, - }) - } - - pub async fn get_provider_usage_trends(&self, days: u32) -> AnyhowResult> { - let end_date = Utc::now(); - let start_date = end_date - Duration::days(days as i64); - - let trend_rows = sqlx::query( - r#" - SELECT provider, - DATE(created_at) as date, - COUNT(*) as session_count, - COALESCE(SUM(token_count), 0) as token_count - FROM chat_sessions - WHERE created_at >= ? AND created_at <= ? - GROUP BY provider, DATE(created_at) - ORDER BY provider, date - "#, - ) - .bind(start_date.to_rfc3339()) - .bind(end_date.to_rfc3339()) - .fetch_all(&self.pool) - .await - .context("Failed to get provider usage trends")?; - - let mut provider_data: HashMap> = HashMap::new(); - for row in trend_rows { - let provider: String = row.try_get("provider")?; - let date_str: String = row.try_get("date")?; - let session_count: i64 = row.try_get("session_count")?; - let token_count: i64 = row.try_get("token_count")?; - - let date = - DateTime::parse_from_str(&format!("{date_str}T00:00:00Z"), "%Y-%m-%dT%H:%M:%SZ") - .context("Failed to parse date")? - .with_timezone(&Utc); - - provider_data.entry(provider).or_default().push(DailyPoint { - date, - session_count: session_count as u32, - token_count: token_count as u64, - }); - } - - let mut trends = Vec::new(); - for (provider, data) in provider_data { - let total_sessions: u32 = data.iter().map(|d| d.session_count).sum(); - let total_tokens: u64 = data.iter().map(|d| d.token_count).sum(); - - trends.push(ProviderTrend { - provider, - total_sessions, - total_tokens, - daily_data: data, - }); - } - - Ok(trends) - } - - pub async fn get_session_length_distribution(&self) -> AnyhowResult { - let dist_rows = sqlx::query( - r#" - SELECT - CASE - WHEN message_count <= 5 THEN 'short' - WHEN message_count <= 20 THEN 'medium' - WHEN message_count <= 50 THEN 'long' - ELSE 'very_long' - END as length_category, - COUNT(*) as count - FROM chat_sessions - GROUP BY length_category - "#, - ) - .fetch_all(&self.pool) - .await - .context("Failed to get session length distribution")?; - - let mut short = 0; - let mut medium = 0; - let mut long = 0; - let mut very_long = 0; - - for row in dist_rows { - let category: String = row.try_get("length_category")?; - let count: i64 = row.try_get("count")?; - - match category.as_str() { - "short" => short = count as u32, - "medium" => medium = count as u32, - "long" => long = count as u32, - "very_long" => very_long = count as u32, - _ => {} - } - } - - let total = short + medium + long + very_long; - let total_f64 = total as f64; - - Ok(SessionLengthDistribution { - short_sessions: short, - medium_sessions: medium, - long_sessions: long, - very_long_sessions: very_long, - short_percentage: if total > 0 { - short as f64 / total_f64 * 100.0 - } else { - 0.0 - }, - medium_percentage: if total > 0 { - medium as f64 / total_f64 * 100.0 - } else { - 0.0 - }, - long_percentage: if total > 0 { - long as f64 / total_f64 * 100.0 - } else { - 0.0 - }, - very_long_percentage: if total > 0 { - very_long as f64 / total_f64 * 100.0 - } else { - 0.0 - }, - }) - } - - pub async fn get_hourly_activity(&self, days: u32) -> AnyhowResult> { - let end_date = Utc::now(); - let start_date = end_date - Duration::days(days as i64); - - let hourly_rows = sqlx::query( - r#" - SELECT strftime('%H', created_at) as hour, - COUNT(*) as session_count, - COALESCE(SUM(token_count), 0) as token_count - FROM chat_sessions - WHERE created_at >= ? AND created_at <= ? - GROUP BY hour - ORDER BY hour - "#, - ) - .bind(start_date.to_rfc3339()) - .bind(end_date.to_rfc3339()) - .fetch_all(&self.pool) - .await - .context("Failed to get hourly activity")?; - - let mut hourly_activity = Vec::new(); - for row in hourly_rows { - let hour_str: String = row.try_get("hour")?; - let session_count: i64 = row.try_get("session_count")?; - let token_count: i64 = row.try_get("token_count")?; - - if let Ok(hour) = hour_str.parse::() { - hourly_activity.push(HourlyActivity { - hour, - session_count: session_count as u32, - token_count: token_count as u64, - }); - } - } - - Ok(hourly_activity) - } - - pub async fn generate_insights(&self, days: u32) -> AnyhowResult> { - let end_date = Utc::now(); - let start_date = end_date - Duration::days(days as i64); - - // Get overall stats - let total_sessions: i64 = sqlx::query_scalar( - "SELECT COUNT(*) FROM chat_sessions WHERE created_at >= ? AND created_at <= ?", - ) - .bind(start_date.to_rfc3339()) - .bind(end_date.to_rfc3339()) - .fetch_one(&self.pool) - .await - .context("Failed to get total sessions")?; - - let total_tokens: i64 = sqlx::query_scalar( - "SELECT COALESCE(SUM(token_count), 0) FROM chat_sessions WHERE created_at >= ? AND created_at <= ?" - ) - .bind(start_date.to_rfc3339()) - .bind(end_date.to_rfc3339()) - .fetch_one(&self.pool) - .await - .context("Failed to get total tokens")?; - - // Get most used provider - let most_used_provider: Option = sqlx::query_scalar( - r#" - SELECT provider FROM chat_sessions - WHERE created_at >= ? AND created_at <= ? - GROUP BY provider - ORDER BY COUNT(*) DESC - LIMIT 1 - "#, - ) - .bind(start_date.to_rfc3339()) - .bind(end_date.to_rfc3339()) - .fetch_optional(&self.pool) - .await - .context("Failed to get most used provider")?; - - let mut insights = Vec::new(); - - if total_sessions > 0 { - insights.push(format!( - "📊 Usage Summary: {total_sessions} sessions with {total_tokens} tokens over the last {days} days" - )); - - if let Some(provider) = most_used_provider { - insights.push(format!("🎯 Most used provider: {provider}")); - } - - let avg_tokens = total_tokens as f64 / total_sessions as f64; - insights.push(format!("📈 Average tokens per session: {avg_tokens:.1}")); - - // Get session length distribution - let distribution = self.get_session_length_distribution().await?; - if distribution.short_sessions > distribution.medium_sessions { - insights.push("💡 Most sessions are short (≤5 messages) - consider longer conversations for better context".to_string()); - } else if distribution.very_long_sessions > 0 { - insights.push( - "🔥 You have some very long sessions (>50 messages) - great for complex tasks!" - .to_string(), - ); - } - } else { - insights.push( - "📝 No activity in the selected time period. Start chatting to see insights!" - .to_string(), - ); - } - - Ok(insights) - } - - pub async fn get_total_stats(&self) -> AnyhowResult<(u32, u32, u64)> { - let total_sessions: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM chat_sessions") - .fetch_one(&self.pool) - .await - .context("Failed to get total sessions")?; - - let total_messages: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM messages") - .fetch_one(&self.pool) - .await - .context("Failed to get total messages")?; - - let total_tokens: i64 = - sqlx::query_scalar("SELECT COALESCE(SUM(token_count), 0) FROM chat_sessions") - .fetch_one(&self.pool) - .await - .context("Failed to get total tokens")?; - - Ok(( - total_sessions as u32, - total_messages as u32, - total_tokens as u64, - )) - } - pub async fn save_analytics(&self, analytics: &Analytics) -> AnyhowResult { let generated_at = analytics.generated_at.to_rfc3339(); @@ -566,50 +199,3 @@ impl AnalyticsRepository { } } } - -// Data structures (same as original) -#[derive(Debug)] -pub struct DailyUsageStats { - pub date: DateTime, - pub total_sessions: u32, - pub total_messages: u32, - pub total_tokens: u64, - pub provider_usage: HashMap, - pub hourly_distribution: Vec, // 24 elements for hours 0-23 - pub avg_session_length: f64, - pub avg_tokens_per_session: f64, -} - -#[derive(Debug)] -pub struct ProviderTrend { - pub provider: String, - pub total_sessions: u32, - pub total_tokens: u64, - pub daily_data: Vec, -} - -#[derive(Debug)] -pub struct DailyPoint { - pub date: DateTime, - pub session_count: u32, - pub token_count: u64, -} - -#[derive(Debug)] -pub struct SessionLengthDistribution { - pub short_sessions: u32, // <= 5 messages - pub medium_sessions: u32, // 6-20 messages - pub long_sessions: u32, // 21-50 messages - pub very_long_sessions: u32, // > 50 messages - pub short_percentage: f64, - pub medium_percentage: f64, - pub long_percentage: f64, - pub very_long_percentage: f64, -} - -#[derive(Debug)] -pub struct HourlyActivity { - pub hour: u8, // 0-23 - pub session_count: u32, - pub token_count: u64, -} diff --git a/src/database/mod.rs b/src/database/mod.rs index 4f13616..1a220a8 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -10,10 +10,7 @@ pub mod schema; pub mod tool_operation_repo; // Main repositories (now using SQLx) -pub use analytics_repo::{ - AnalyticsRepository, DailyPoint, DailyUsageStats, HourlyActivity, ProviderTrend, - SessionLengthDistribution, -}; +pub use analytics_repo::AnalyticsRepository; pub use analytics_request_repo::AnalyticsRequestRepository; pub use chat_session_repo::ChatSessionRepository; pub use connection::DatabaseManager; diff --git a/src/tools/analytics.rs b/src/tools/analytics.rs deleted file mode 100644 index c4d9714..0000000 --- a/src/tools/analytics.rs +++ /dev/null @@ -1,258 +0,0 @@ -use crate::models::message::{Message, ToolUse}; -use crate::tools::types::ToolType; -use std::collections::HashMap; - -/// Statistics about tool usage -#[derive(Debug, Clone)] -pub struct ToolUsageStats { - /// Total number of tool uses - pub total_tools: usize, - /// Count by tool type - pub by_type: HashMap, - /// Count of file operations - pub file_operations: usize, - /// Count of network operations - pub network_operations: usize, - /// Count of code operations - pub code_operations: usize, -} - -impl ToolUsageStats { - /// Create new empty stats - pub fn new() -> Self { - Self { - total_tools: 0, - by_type: HashMap::new(), - file_operations: 0, - network_operations: 0, - code_operations: 0, - } - } - - /// Add a tool use to the statistics - pub fn add_tool(&mut self, tool_use: &ToolUse) { - self.total_tools += 1; - - let tool_type = ToolType::from_name(&tool_use.name); - *self.by_type.entry(tool_type.to_string()).or_insert(0) += 1; - - if tool_type.is_file_operation() { - self.file_operations += 1; - } - if tool_type.is_network_operation() { - self.network_operations += 1; - } - if tool_type.is_code_operation() { - self.code_operations += 1; - } - } - - /// Get the most frequently used tool type - pub fn most_used_tool(&self) -> Option<(&String, &usize)> { - self.by_type.iter().max_by_key(|(_, count)| *count) - } - - /// Get percentage of file operations - pub fn file_operations_percentage(&self) -> f64 { - if self.total_tools == 0 { - 0.0 - } else { - (self.file_operations as f64 / self.total_tools as f64) * 100.0 - } - } -} - -impl Default for ToolUsageStats { - fn default() -> Self { - Self::new() - } -} - -/// Analytics tool usage from messages -pub fn analytics_tool_usage(messages: &[Message]) -> ToolUsageStats { - let mut stats = ToolUsageStats::new(); - - for message in messages { - if let Some(tool_uses) = &message.tool_uses { - for tool_use in tool_uses { - stats.add_tool(tool_use); - } - } - } - - stats -} - -/// Find messages with specific tool type -pub fn find_messages_with_tool(messages: &[Message], tool_type: ToolType) -> Vec<&Message> { - messages - .iter() - .filter(|msg| { - msg.tool_uses - .as_ref() - .map(|uses| { - uses.iter().any(|use_| { - ToolType::from_name(&use_.name).canonical_name() - == tool_type.canonical_name() - }) - }) - .unwrap_or(false) - }) - .collect() -} - -/// Extract all file paths from tool uses in messages -pub fn extract_file_paths(messages: &[Message]) -> Vec { - let mut paths = Vec::new(); - - for message in messages { - if let Some(tool_uses) = &message.tool_uses { - for tool_use in tool_uses { - let tool_type = ToolType::from_name(&tool_use.name); - if tool_type.is_file_operation() { - if let Some(path) = tool_use.input.get("file_path").and_then(|v| v.as_str()) { - paths.push(path.to_string()); - } - } - } - } - } - - paths -} - -/// Count tool uses by tool name -pub fn count_by_tool_name(messages: &[Message]) -> HashMap { - let mut counts = HashMap::new(); - - for message in messages { - if let Some(tool_uses) = &message.tool_uses { - for tool_use in tool_uses { - *counts.entry(tool_use.name.clone()).or_insert(0) += 1; - } - } - } - - counts -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::models::message::MessageRole; - use chrono::Utc; - use serde_json::json; - use uuid::Uuid; - - fn create_test_message_with_tools(tools: Vec) -> Message { - Message::new( - Uuid::new_v4(), - MessageRole::Assistant, - "test".to_string(), - Utc::now(), - 1, - ) - .with_tool_uses(tools) - } - - fn create_tool_use(name: &str, _vendor_type: &str) -> ToolUse { - ToolUse { - id: Uuid::new_v4().to_string(), - name: name.to_string(), - input: json!({}), - raw: json!({}), - } - } - - #[test] - fn test_tool_usage_stats() { - let mut stats = ToolUsageStats::new(); - - let bash_tool = create_tool_use("Bash", "tool_use"); - let read_tool = create_tool_use("Read", "tool_use"); - - stats.add_tool(&bash_tool); - stats.add_tool(&read_tool); - stats.add_tool(&read_tool); - - assert_eq!(stats.total_tools, 3); - assert_eq!(*stats.by_type.get("Bash").unwrap(), 1); - assert_eq!(*stats.by_type.get("Read").unwrap(), 2); - assert_eq!(stats.file_operations, 2); // Read is a file operation - } - - #[test] - fn test_analytics_tool_usage() { - let tools = vec![ - create_tool_use("Bash", "tool_use"), - create_tool_use("Read", "tool_use"), - ]; - let messages = vec![create_test_message_with_tools(tools)]; - - let stats = analytics_tool_usage(&messages); - - assert_eq!(stats.total_tools, 2); - assert!(stats.by_type.contains_key("Bash")); - assert!(stats.by_type.contains_key("Read")); - } - - #[test] - fn test_find_messages_with_tool() { - let bash_tools = vec![create_tool_use("Bash", "tool_use")]; - let read_tools = vec![create_tool_use("Read", "tool_use")]; - - let messages = vec![ - create_test_message_with_tools(bash_tools), - create_test_message_with_tools(read_tools), - ]; - - let bash_messages = find_messages_with_tool(&messages, ToolType::Bash); - assert_eq!(bash_messages.len(), 1); - - let read_messages = find_messages_with_tool(&messages, ToolType::Read); - assert_eq!(read_messages.len(), 1); - } - - #[test] - fn test_extract_file_paths() { - let tools = vec![ToolUse { - id: "test".to_string(), - name: "Read".to_string(), - input: json!({"file_path": "/path/to/file.rs"}), - raw: json!({}), - }]; - - let messages = vec![create_test_message_with_tools(tools)]; - - let paths = extract_file_paths(&messages); - assert_eq!(paths.len(), 1); - assert_eq!(paths[0], "/path/to/file.rs"); - } - - #[test] - fn test_count_by_vendor() { - let tools = vec![ - create_tool_use("Bash", "tool_use"), - create_tool_use("Read", "tool-call"), - ]; - - let messages = vec![create_test_message_with_tools(tools)]; - - let counts = count_by_tool_name(&messages); - assert_eq!(*counts.get("Bash").unwrap(), 1); - assert_eq!(*counts.get("Read").unwrap(), 1); - } - - #[test] - fn test_most_used_tool() { - let mut stats = ToolUsageStats::new(); - - stats.add_tool(&create_tool_use("Bash", "tool_use")); - stats.add_tool(&create_tool_use("Read", "tool_use")); - stats.add_tool(&create_tool_use("Read", "tool_use")); - - let (tool, count) = stats.most_used_tool().unwrap(); - assert_eq!(tool, "Read"); - assert_eq!(*count, 2); - } -} diff --git a/src/tools/mod.rs b/src/tools/mod.rs index e198ca1..5c1a63b 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,4 +1,3 @@ -pub mod analytics; pub mod parsers; pub mod types;