diff --git a/crates/q_cli/src/cli/chat/command.rs b/crates/q_cli/src/cli/chat/command.rs index e63fbc244a..47eaf144e2 100644 --- a/crates/q_cli/src/cli/chat/command.rs +++ b/crates/q_cli/src/cli/chat/command.rs @@ -42,6 +42,7 @@ pub enum Command { Tools { subcommand: Option, }, + Usage, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -696,6 +697,7 @@ impl Command { }, } }, + "usage" => Self::Usage, unknown_command => { // If the command starts with a slash but isn't recognized, // return an error instead of treating it as a prompt diff --git a/crates/q_cli/src/cli/chat/conversation_state.rs b/crates/q_cli/src/cli/chat/conversation_state.rs index 6ce38a9bab..766f69bfee 100644 --- a/crates/q_cli/src/cli/chat/conversation_state.rs +++ b/crates/q_cli/src/cli/chat/conversation_state.rs @@ -190,6 +190,11 @@ impl ConversationState { self.conversation_id.as_ref() } + /// Returns the conversation history. + pub fn get_chat_history(&self) -> Vec { + self.history.iter().cloned().collect() + } + /// Returns the message id associated with the last assistant message, if present. /// /// This is equivalent to `utterance_id` in the Q API. diff --git a/crates/q_cli/src/cli/chat/mod.rs b/crates/q_cli/src/cli/chat/mod.rs index 56880e7198..cb7889004c 100644 --- a/crates/q_cli/src/cli/chat/mod.rs +++ b/crates/q_cli/src/cli/chat/mod.rs @@ -73,6 +73,7 @@ use hooks::{ HookTrigger, }; use summarization_state::{ + CONTEXT_WINDOW_SIZE, SummarizationState, TokenWarningLevel, }; @@ -205,6 +206,7 @@ const HELP_TEXT: &str = color_print::cstr! {" rm Remove file(s) from context [--global] clear Clear all files from current context [--global] hooks View and manage context hooks +/usage Show current session's context window usage Tips: !{command} Quickly execute a command in your current session @@ -1895,6 +1897,157 @@ where // during PromptUser. execute!(self.output, style::Print("\n\n"),)?; + ChatState::PromptUser { + tool_uses: Some(tool_uses), + pending_tool_index, + skip_printing_tools: true, + } + }, + Command::Usage => { + let context_messages = self.conversation_state.context_messages(None).await; + let chat_history = self.conversation_state.get_chat_history(); + let assistant_messages = chat_history + .iter() + .filter_map(|message| { + if let fig_api_client::model::ChatMessage::AssistantResponseMessage(msg) = message { + Some(msg) + } else { + None + } + }) + .collect::>(); + + let user_messages = chat_history + .iter() + .filter_map(|message| { + if let fig_api_client::model::ChatMessage::UserInputMessage(msg) = message { + Some(msg) + } else { + None + } + }) + .collect::>(); + + let context_token_count = context_messages + .iter() + .map(|msg| TokenCounter::count_tokens(&msg.0.content)) + .sum::(); + + let assistant_token_count = assistant_messages + .iter() + .map(|msg| TokenCounter::count_tokens(&msg.content)) + .sum::(); + + let user_token_count = user_messages + .iter() + .map(|msg| TokenCounter::count_tokens(&msg.content)) + .sum::(); + + let total_token_used: usize = context_token_count + assistant_token_count + user_token_count; + + let window_width = self.terminal_width(); + let progress_bar_width = std::cmp::min(window_width, 80); // set a max width for the progress bar for better aesthetic + + let context_width = + ((context_token_count as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize; + let assistant_width = + ((assistant_token_count as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize; + let user_width = + ((user_token_count as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize; + + let left_over_width = progress_bar_width + - std::cmp::min(context_width + assistant_width + user_width, progress_bar_width); + + queue!( + self.output, + style::Print(format!( + "\nCurrent context window ({} of {}k tokens used)\n", + total_token_used, + CONTEXT_WINDOW_SIZE / 1000 + )), + style::SetForegroundColor(Color::DarkCyan), + // add a nice visual to mimic "tiny" progress, so the overral progress bar doesn't look too empty + style::Print("|".repeat(if context_width == 0 && context_token_count > 0 { + 1 + } else { + 0 + })), + style::Print("█".repeat(context_width)), + style::SetForegroundColor(Color::Blue), + style::Print("|".repeat(if assistant_width == 0 && assistant_token_count > 0 { + 1 + } else { + 0 + })), + style::Print("█".repeat(assistant_width)), + style::SetForegroundColor(Color::Magenta), + style::Print("|".repeat(if user_width == 0 && user_token_count > 0 { 1 } else { 0 })), + style::Print("█".repeat(user_width)), + style::SetForegroundColor(Color::DarkGrey), + style::Print("█".repeat(left_over_width)), + style::Print(" "), + style::SetForegroundColor(Color::Reset), + style::Print(format!( + "{:.2}%", + (total_token_used as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0 + )), + )?; + + queue!(self.output, style::Print("\n\n"))?; + self.output.flush()?; + + queue!( + self.output, + style::SetForegroundColor(Color::DarkCyan), + style::Print("█ Context files: "), + style::SetForegroundColor(Color::Reset), + style::Print(format!( + "~{} tokens ({:.2}%)\n", + context_token_count, + (context_token_count as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0 + )), + style::SetForegroundColor(Color::Blue), + style::Print("█ Q responses: "), + style::SetForegroundColor(Color::Reset), + style::Print(format!( + " ~{} tokens ({:.2}%)\n", + assistant_token_count, + (assistant_token_count as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0 + )), + style::SetForegroundColor(Color::Magenta), + style::Print("█ Your prompts: "), + style::SetForegroundColor(Color::Reset), + style::Print(format!( + " ~{} tokens ({:.2}%)\n\n", + user_token_count, + (user_token_count as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0 + )), + )?; + + queue!( + self.output, + style::SetAttribute(Attribute::Bold), + style::Print("\n💡 Pro Tips:\n"), + style::SetAttribute(Attribute::Reset), + style::SetForegroundColor(Color::DarkGrey), + style::Print("Run "), + style::SetForegroundColor(Color::DarkGreen), + style::Print("/compact"), + style::SetForegroundColor(Color::DarkGrey), + style::Print(" to replace the conversation history with its summary\n"), + style::Print("Run "), + style::SetForegroundColor(Color::DarkGreen), + style::Print("/clear"), + style::SetForegroundColor(Color::DarkGrey), + style::Print(" to erase the entire chat history\n"), + style::Print("Run "), + style::SetForegroundColor(Color::DarkGreen), + style::Print("/context show"), + style::SetForegroundColor(Color::DarkGrey), + style::Print(" to see tokens per context file\n\n"), + style::SetForegroundColor(Color::Reset), + )?; + ChatState::PromptUser { tool_uses: Some(tool_uses), pending_tool_index, diff --git a/crates/q_cli/src/cli/chat/prompt.rs b/crates/q_cli/src/cli/chat/prompt.rs index 710c1e5c60..2a0bd05966 100644 --- a/crates/q_cli/src/cli/chat/prompt.rs +++ b/crates/q_cli/src/cli/chat/prompt.rs @@ -67,6 +67,7 @@ pub const COMMANDS: &[&str] = &[ "/compact", "/compact help", "/compact --summary", + "/usage", ]; pub fn generate_prompt(current_profile: Option<&str>, warning: bool) -> String { diff --git a/crates/q_cli/src/cli/chat/summarization_state.rs b/crates/q_cli/src/cli/chat/summarization_state.rs index 04ad47283f..c836a31ed0 100644 --- a/crates/q_cli/src/cli/chat/summarization_state.rs +++ b/crates/q_cli/src/cli/chat/summarization_state.rs @@ -2,17 +2,20 @@ use std::collections::VecDeque; use fig_api_client::model::ChatMessage; +use crate::util::token_counter::TokenCounter; + /// Character count warning levels for conversation size #[derive(Debug, Clone, PartialEq, Eq)] pub enum TokenWarningLevel { /// No warning, conversation is within normal limits None, - /// Critical level - at single warning threshold (500K characters) + /// Critical level - at single warning threshold (600K characters) Critical, } /// Constants for character-based warning threshold -pub const MAX_CHARS: usize = 500000; // Character-based warning threshold +pub const CONTEXT_WINDOW_SIZE: usize = 200_000; // tokens +pub const MAX_CHARS: usize = TokenCounter::token_to_chars(CONTEXT_WINDOW_SIZE); // Character-based warning threshold /// State for tracking summarization process #[derive(Debug, Clone)] diff --git a/crates/q_cli/src/util/token_counter.rs b/crates/q_cli/src/util/token_counter.rs index 3dd797688d..fb0c188be6 100644 --- a/crates/q_cli/src/util/token_counter.rs +++ b/crates/q_cli/src/util/token_counter.rs @@ -1,12 +1,18 @@ pub struct TokenCounter; impl TokenCounter { + pub const TOKEN_TO_CHAR_RATIO: usize = 3; + /// Estimates the number of tokens in the input content. - /// Currently uses a simple heuristic: content length / 3 + /// Currently uses a simple heuristic: content length / TOKEN_TO_CHAR_RATIO /// /// Rounds up to the nearest multiple of 10 to avoid giving users a false sense of precision. pub fn count_tokens(content: &str) -> usize { - (content.len() / 3 + 5) / 10 * 10 + (content.len() / Self::TOKEN_TO_CHAR_RATIO + 5) / 10 * 10 + } + + pub const fn token_to_chars(token: usize) -> usize { + token * Self::TOKEN_TO_CHAR_RATIO } }