aws · GoodluckH · Apr 16, 2025 · Apr 15, 2025
diff --git a/crates/q_cli/src/cli/chat/command.rs b/crates/q_cli/src/cli/chat/command.rs
@@ -42,6 +42,7 @@ pub enum Command {
     Tools {
         subcommand: Option<ToolsSubcommand>,
     },
+    Usage,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -696,6 +697,7 @@ impl Command {
                         },
                     }
                 },
+                "usage" => Self::Usage,
                 unknown_command => {
                     // If the command starts with a slash but isn't recognized,
                     // return an error instead of treating it as a prompt

diff --git a/crates/q_cli/src/cli/chat/conversation_state.rs b/crates/q_cli/src/cli/chat/conversation_state.rs
@@ -190,6 +190,11 @@ impl ConversationState {
         self.conversation_id.as_ref()
     }
 
+    /// Returns the conversation history.
+    pub fn get_chat_history(&self) -> Vec<ChatMessage> {
+        self.history.iter().cloned().collect()
+    }
+
     /// Returns the message id associated with the last assistant message, if present.
     ///
     /// This is equivalent to `utterance_id` in the Q API.

diff --git a/crates/q_cli/src/cli/chat/mod.rs b/crates/q_cli/src/cli/chat/mod.rs
@@ -73,6 +73,7 @@ use hooks::{
     HookTrigger,
 };
 use summarization_state::{
+    CONTEXT_WINDOW_SIZE,
     SummarizationState,
     TokenWarningLevel,
 };
@@ -205,6 +206,7 @@ const HELP_TEXT: &str = color_print::cstr! {"
   <em>rm</em>          <black!>Remove file(s) from context [--global]</black!>
   <em>clear</em>       <black!>Clear all files from current context [--global]</black!>
   <em>hooks</em>       <black!>View and manage context hooks</black!>
+<em>/usage</em>      <black!>Show current session's context window usage</black!>
 
 <cyan,em>Tips:</cyan,em>
 <em>!{command}</em>            <black!>Quickly execute a command in your current session</black!>
@@ -1895,6 +1897,157 @@ where
                 // during PromptUser.
                 execute!(self.output, style::Print("\n\n"),)?;
 
+                ChatState::PromptUser {
+                    tool_uses: Some(tool_uses),
+                    pending_tool_index,
+                    skip_printing_tools: true,
+                }
+            },
+            Command::Usage => {
+                let context_messages = self.conversation_state.context_messages(None).await;
+                let chat_history = self.conversation_state.get_chat_history();
+                let assistant_messages = chat_history
+                    .iter()
+                    .filter_map(|message| {
+                        if let fig_api_client::model::ChatMessage::AssistantResponseMessage(msg) = message {
+                            Some(msg)
+                        } else {
+                            None
+                        }
+                    })
+                    .collect::<Vec<_>>();
+
+                let user_messages = chat_history
+                    .iter()
+                    .filter_map(|message| {
+                        if let fig_api_client::model::ChatMessage::UserInputMessage(msg) = message {
+                            Some(msg)
+                        } else {
+                            None
+                        }
+                    })
+                    .collect::<Vec<_>>();
+
+                let context_token_count = context_messages
+                    .iter()
+                    .map(|msg| TokenCounter::count_tokens(&msg.0.content))
+                    .sum::<usize>();
+
+                let assistant_token_count = assistant_messages
+                    .iter()
+                    .map(|msg| TokenCounter::count_tokens(&msg.content))
+                    .sum::<usize>();
+
+                let user_token_count = user_messages
+                    .iter()
+                    .map(|msg| TokenCounter::count_tokens(&msg.content))
+                    .sum::<usize>();
+
+                let total_token_used: usize = context_token_count + assistant_token_count + user_token_count;
+
+                let window_width = self.terminal_width();
+                let progress_bar_width = std::cmp::min(window_width, 80); // set a max width for the progress bar for better aesthetic
+
+                let context_width =
+                    ((context_token_count as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
+                let assistant_width =
+                    ((assistant_token_count as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
+                let user_width =
+                    ((user_token_count as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
+
+                let left_over_width = progress_bar_width
+                    - std::cmp::min(context_width + assistant_width + user_width, progress_bar_width);
+
+                queue!(
+                    self.output,
+                    style::Print(format!(
+                        "\nCurrent context window ({} of {}k tokens used)\n",
+                        total_token_used,
+                        CONTEXT_WINDOW_SIZE / 1000
+                    )),
+                    style::SetForegroundColor(Color::DarkCyan),
+                    // add a nice visual to mimic "tiny" progress, so the overral progress bar doesn't look too empty
+                    style::Print("|".repeat(if context_width == 0 && context_token_count > 0 {
+                        1
+                    } else {
+                        0
+                    })),
+                    style::Print("█".repeat(context_width)),
+                    style::SetForegroundColor(Color::Blue),
+                    style::Print("|".repeat(if assistant_width == 0 && assistant_token_count > 0 {
+                        1
+                    } else {
+                        0
+                    })),
+                    style::Print("█".repeat(assistant_width)),
+                    style::SetForegroundColor(Color::Magenta),
+                    style::Print("|".repeat(if user_width == 0 && user_token_count > 0 { 1 } else { 0 })),
+                    style::Print("█".repeat(user_width)),
+                    style::SetForegroundColor(Color::DarkGrey),
+                    style::Print("█".repeat(left_over_width)),
+                    style::Print(" "),
+                    style::SetForegroundColor(Color::Reset),
+                    style::Print(format!(
+                        "{:.2}%",
+                        (total_token_used as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                    )),
+                )?;
+
+                queue!(self.output, style::Print("\n\n"))?;
+                self.output.flush()?;
+
+                queue!(
+                    self.output,
+                    style::SetForegroundColor(Color::DarkCyan),
+                    style::Print("█ Context files: "),
+                    style::SetForegroundColor(Color::Reset),
+                    style::Print(format!(
+                        "~{} tokens ({:.2}%)\n",
+                        context_token_count,
+                        (context_token_count as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                    )),
+                    style::SetForegroundColor(Color::Blue),
+                    style::Print("█ Q responses: "),
+                    style::SetForegroundColor(Color::Reset),
+                    style::Print(format!(
+                        "  ~{} tokens ({:.2}%)\n",
+                        assistant_token_count,
+                        (assistant_token_count as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                    )),
+                    style::SetForegroundColor(Color::Magenta),
+                    style::Print("█ Your prompts: "),
+                    style::SetForegroundColor(Color::Reset),
+                    style::Print(format!(
+                        " ~{} tokens ({:.2}%)\n\n",
+                        user_token_count,
+                        (user_token_count as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                    )),
+                )?;
+
+                queue!(
+                    self.output,
+                    style::SetAttribute(Attribute::Bold),
+                    style::Print("\n💡 Pro Tips:\n"),
+                    style::SetAttribute(Attribute::Reset),
+                    style::SetForegroundColor(Color::DarkGrey),
+                    style::Print("Run "),
+                    style::SetForegroundColor(Color::DarkGreen),
+                    style::Print("/compact"),
+                    style::SetForegroundColor(Color::DarkGrey),
+                    style::Print(" to replace the conversation history with its summary\n"),
+                    style::Print("Run "),
+                    style::SetForegroundColor(Color::DarkGreen),
+                    style::Print("/clear"),
+                    style::SetForegroundColor(Color::DarkGrey),
+                    style::Print(" to erase the entire chat history\n"),
+                    style::Print("Run "),
+                    style::SetForegroundColor(Color::DarkGreen),
+                    style::Print("/context show"),
+                    style::SetForegroundColor(Color::DarkGrey),
+                    style::Print(" to see tokens per context file\n\n"),
+                    style::SetForegroundColor(Color::Reset),
+                )?;
+
                 ChatState::PromptUser {
                     tool_uses: Some(tool_uses),
                     pending_tool_index,

diff --git a/crates/q_cli/src/cli/chat/prompt.rs b/crates/q_cli/src/cli/chat/prompt.rs
@@ -67,6 +67,7 @@ pub const COMMANDS: &[&str] = &[
     "/compact",
     "/compact help",
     "/compact --summary",
+    "/usage",
 ];
 
 pub fn generate_prompt(current_profile: Option<&str>, warning: bool) -> String {

diff --git a/crates/q_cli/src/cli/chat/summarization_state.rs b/crates/q_cli/src/cli/chat/summarization_state.rs
@@ -2,17 +2,20 @@ use std::collections::VecDeque;
 
 use fig_api_client::model::ChatMessage;
 
+use crate::util::token_counter::TokenCounter;
+
 /// Character count warning levels for conversation size
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum TokenWarningLevel {
     /// No warning, conversation is within normal limits
     None,
-    /// Critical level - at single warning threshold (500K characters)
+    /// Critical level - at single warning threshold (600K characters)
     Critical,
 }
 
 /// Constants for character-based warning threshold
-pub const MAX_CHARS: usize = 500000; // Character-based warning threshold
+pub const CONTEXT_WINDOW_SIZE: usize = 200_000; // tokens
+pub const MAX_CHARS: usize = TokenCounter::token_to_chars(CONTEXT_WINDOW_SIZE); // Character-based warning threshold
 
 /// State for tracking summarization process
 #[derive(Debug, Clone)]

diff --git a/crates/q_cli/src/util/token_counter.rs b/crates/q_cli/src/util/token_counter.rs
@@ -1,12 +1,18 @@
 pub struct TokenCounter;
 
 impl TokenCounter {
+    pub const TOKEN_TO_CHAR_RATIO: usize = 3;
+
     /// Estimates the number of tokens in the input content.
-    /// Currently uses a simple heuristic: content length / 3
+    /// Currently uses a simple heuristic: content length / TOKEN_TO_CHAR_RATIO
     ///
     /// Rounds up to the nearest multiple of 10 to avoid giving users a false sense of precision.
     pub fn count_tokens(content: &str) -> usize {
-        (content.len() / 3 + 5) / 10 * 10
+        (content.len() / Self::TOKEN_TO_CHAR_RATIO + 5) / 10 * 10
+    }
+
+    pub const fn token_to_chars(token: usize) -> usize {
+        token * Self::TOKEN_TO_CHAR_RATIO
     }
 }