feat: add experimental model for amazon users (#2495)

brandonskiser · web-flow · commit a0e96981e675 · 2025-08-04T17:41:56.000-07:00
diff --git a/crates/chat-cli/src/auth/builder_id.rs b/crates/chat-cli/src/auth/builder_id.rs
@@ -303,6 +303,20 @@ impl BuilderIdToken {
 
     /// Load the token from the keychain, refresh the token if it is expired and return it
     pub async fn load(database: &Database) -> Result<Option<Self>, AuthError> {
+        // Can't use #[cfg(test)] without breaking lints, and we don't want to require
+        // authentication in order to run ChatSession tests. Hence, adding this here with cfg!(test)
+        if cfg!(test) {
+            return Ok(Some(Self {
+                access_token: Secret("test_access_token".to_string()),
+                expires_at: time::OffsetDateTime::now_utc() + time::Duration::minutes(60),
+                refresh_token: Some(Secret("test_refresh_token".to_string())),
+                region: Some(OIDC_BUILDER_ID_REGION.to_string()),
+                start_url: Some(START_URL.to_string()),
+                oauth_flow: OAuthFlow::DeviceCode,
+                scopes: Some(SCOPES.iter().map(|s| (*s).to_owned()).collect()),
+            }));
+        }
+
         trace!("loading builder id token from the secret store");
         match database.get_secret(Self::SECRET_KEY).await {
             Ok(Some(secret)) => {
diff --git a/crates/chat-cli/src/cli/chat/cli/context.rs b/crates/chat-cli/src/cli/chat/cli/context.rs
@@ -10,11 +10,11 @@ use crossterm::{
     style,
 };
 
-use crate::cli::chat::consts::{
-    AGENT_FORMAT_HOOKS_DOC_URL,
-    CONTEXT_FILES_MAX_SIZE,
+use crate::cli::chat::consts::AGENT_FORMAT_HOOKS_DOC_URL;
+use crate::cli::chat::context::{
+    ContextFilePath,
+    calc_max_context_files_size,
 };
-use crate::cli::chat::context::ContextFilePath;
 use crate::cli::chat::token_counter::TokenCounter;
 use crate::cli::chat::util::drop_matched_context_files;
 use crate::cli::chat::{
@@ -222,11 +222,12 @@ impl ContextSubcommand {
                         execute!(session.stderr, style::Print(format!("{}\n\n", "▔".repeat(3))),)?;
                     }
 
+                    let context_files_max_size = calc_max_context_files_size(session.conversation.model.as_deref());
                     let mut files_as_vec = profile_context_files
                         .iter()
                         .map(|(path, content, _)| (path.clone(), content.clone()))
                         .collect::<Vec<_>>();
-                    let dropped_files = drop_matched_context_files(&mut files_as_vec, CONTEXT_FILES_MAX_SIZE).ok();
+                    let dropped_files = drop_matched_context_files(&mut files_as_vec, context_files_max_size).ok();
 
                     execute!(
                         session.stderr,
@@ -240,7 +241,7 @@ impl ContextSubcommand {
                                 style::SetForegroundColor(Color::DarkYellow),
                                 style::Print(format!(
                                     "Total token count exceeds limit: {}. The following files will be automatically dropped when interacting with Q. Consider removing them. \n\n",
-                                    CONTEXT_FILES_MAX_SIZE
+                                    context_files_max_size
                                 )),
                                 style::SetForegroundColor(Color::Reset)
                             )?;
diff --git a/crates/chat-cli/src/cli/chat/cli/mod.rs b/crates/chat-cli/src/cli/chat/cli/mod.rs
@@ -134,7 +134,7 @@ impl SlashCommand {
             Self::Hooks(args) => args.execute(session).await,
             Self::Usage(args) => args.execute(os, session).await,
             Self::Mcp(args) => args.execute(session).await,
-            Self::Model(args) => args.execute(session).await,
+            Self::Model(args) => args.execute(os, session).await,
             Self::Subscribe(args) => args.execute(os, session).await,
             Self::Persist(subcommand) => subcommand.execute(os, session).await,
             // Self::Root(subcommand) => {
diff --git a/crates/chat-cli/src/cli/chat/cli/model.rs b/crates/chat-cli/src/cli/chat/cli/model.rs
@@ -9,6 +9,7 @@ use crossterm::{
 };
 use dialoguer::Select;
 
+use crate::auth::AuthError;
 use crate::auth::builder_id::{
     BuilderIdToken,
     TokenType,
@@ -21,18 +22,37 @@ use crate::cli::chat::{
 use crate::os::Os;
 
 pub struct ModelOption {
+    /// Display name
     pub name: &'static str,
+    /// Actual model id to send in the API
     pub model_id: &'static str,
+    /// Size of the model's context window, in tokens
+    pub context_window_tokens: usize,
 }
 
-pub const MODEL_OPTIONS: [ModelOption; 2] = [
+const MODEL_OPTIONS: [ModelOption; 2] = [
     ModelOption {
         name: "claude-4-sonnet",
         model_id: "CLAUDE_SONNET_4_20250514_V1_0",
+        context_window_tokens: 200_000,
     },
     ModelOption {
         name: "claude-3.7-sonnet",
         model_id: "CLAUDE_3_7_SONNET_20250219_V1_0",
+        context_window_tokens: 200_000,
+    },
+];
+
+const OPENAI_MODEL_OPTIONS: [ModelOption; 2] = [
+    ModelOption {
+        name: "experimental-gpt-oss-120b",
+        model_id: "OPENAI_GPT_OSS_120B_1_0",
+        context_window_tokens: 128_000,
+    },
+    ModelOption {
+        name: "experimental-gpt-oss-20b",
+        model_id: "OPENAI_GPT_OSS_20B_1_0",
+        context_window_tokens: 128_000,
     },
 ];
 
@@ -41,17 +61,19 @@ pub const MODEL_OPTIONS: [ModelOption; 2] = [
 pub struct ModelArgs;
 
 impl ModelArgs {
-    pub async fn execute(self, session: &mut ChatSession) -> Result<ChatState, ChatError> {
-        Ok(select_model(session)?.unwrap_or(ChatState::PromptUser {
+    pub async fn execute(self, os: &Os, session: &mut ChatSession) -> Result<ChatState, ChatError> {
+        Ok(select_model(os, session).await?.unwrap_or(ChatState::PromptUser {
             skip_printing_tools: false,
         }))
     }
 }
 
-pub fn select_model(session: &mut ChatSession) -> Result<Option<ChatState>, ChatError> {
+pub async fn select_model(os: &Os, session: &mut ChatSession) -> Result<Option<ChatState>, ChatError> {
     queue!(session.stderr, style::Print("\n"))?;
     let active_model_id = session.conversation.model.as_deref();
-    let labels: Vec<String> = MODEL_OPTIONS
+    let model_options = get_model_options(os).await?;
+
+    let labels: Vec<String> = model_options
         .iter()
         .map(|opt| {
             if (opt.model_id.is_empty() && active_model_id.is_none()) || Some(opt.model_id) == active_model_id {
@@ -83,7 +105,7 @@ pub fn select_model(session: &mut ChatSession) -> Result<Option<ChatState>, Chat
     queue!(session.stderr, style::ResetColor)?;
 
     if let Some(index) = selection {
-        let selected = &MODEL_OPTIONS[index];
+        let selected = &model_options[index];
         let model_id_str = selected.model_id.to_string();
         session.conversation.model = Some(model_id_str);
 
@@ -104,6 +126,8 @@ pub fn select_model(session: &mut ChatSession) -> Result<Option<ChatState>, Chat
     }))
 }
 
+/// Returns a default model id to use if none has been otherwise provided.
+///
 /// Returns Claude 3.7 for: Amazon IDC users, FRA region users
 /// Returns Claude 4.0 for: Builder ID users, other regions
 pub async fn default_model_id(os: &Os) -> &'static str {
@@ -124,3 +148,35 @@ pub async fn default_model_id(os: &Os) -> &'static str {
     // Default to 4.0
     "CLAUDE_SONNET_4_20250514_V1_0"
 }
+
+/// Returns the available models for use.
+pub async fn get_model_options(os: &Os) -> Result<Vec<ModelOption>, ChatError> {
+    let is_amzn_user = BuilderIdToken::load(&os.database)
+        .await?
+        .ok_or(AuthError::NoToken)?
+        .is_amzn_user();
+
+    let mut model_options = MODEL_OPTIONS.into_iter().collect::<Vec<_>>();
+    if is_amzn_user {
+        for opt in OPENAI_MODEL_OPTIONS {
+            model_options.push(opt);
+        }
+    }
+
+    Ok(model_options)
+}
+
+/// Returns the context window length in tokens for the given model_id.
+pub fn context_window_tokens(model_id: Option<&str>) -> usize {
+    const DEFAULT_CONTEXT_WINDOW_LENGTH: usize = 200_000;
+
+    let Some(model_id) = model_id else {
+        return DEFAULT_CONTEXT_WINDOW_LENGTH;
+    };
+
+    MODEL_OPTIONS
+        .iter()
+        .chain(OPENAI_MODEL_OPTIONS.iter())
+        .find(|m| m.model_id == model_id)
+        .map_or(DEFAULT_CONTEXT_WINDOW_LENGTH, |m| m.context_window_tokens)
+}
diff --git a/crates/chat-cli/src/cli/chat/cli/usage.rs b/crates/chat-cli/src/cli/chat/cli/usage.rs
@@ -9,7 +9,7 @@ use crossterm::{
     style,
 };
 
-use crate::cli::chat::consts::CONTEXT_WINDOW_SIZE;
+use super::model::context_window_tokens;
 use crate::cli::chat::token_counter::{
     CharCount,
     TokenCount,
@@ -62,14 +62,16 @@ impl UsageArgs {
         // set a max width for the progress bar for better aesthetic
         let progress_bar_width = std::cmp::min(window_width, 80);
 
+        let context_window_size = context_window_tokens(session.conversation.model.as_deref());
+
         let context_width =
-            ((context_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
+            ((context_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
         let assistant_width =
-            ((assistant_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
+            ((assistant_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
         let tools_width =
-            ((tools_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
+            ((tools_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
         let user_width =
-            ((user_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
+            ((user_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
 
         let left_over_width = progress_bar_width
             - std::cmp::min(
@@ -85,15 +87,15 @@ impl UsageArgs {
                 style::Print(format!(
                     "\nCurrent context window ({} of {}k tokens used)\n",
                     total_token_used,
-                    CONTEXT_WINDOW_SIZE / 1000
+                    context_window_size / 1000
                 )),
                 style::SetForegroundColor(Color::DarkRed),
                 style::Print("█".repeat(progress_bar_width)),
                 style::SetForegroundColor(Color::Reset),
                 style::Print(" "),
                 style::Print(format!(
                     "{:.2}%",
-                    (total_token_used.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                    (total_token_used.value() as f32 / context_window_size as f32) * 100.0
                 )),
             )?;
         } else {
@@ -102,7 +104,7 @@ impl UsageArgs {
                 style::Print(format!(
                     "\nCurrent context window ({} of {}k tokens used)\n",
                     total_token_used,
-                    CONTEXT_WINDOW_SIZE / 1000
+                    context_window_size / 1000
                 )),
                 // Context files
                 style::SetForegroundColor(Color::DarkCyan),
@@ -140,7 +142,7 @@ impl UsageArgs {
                 style::SetForegroundColor(Color::Reset),
                 style::Print(format!(
                     "{:.2}%",
-                    (total_token_used.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                    (total_token_used.value() as f32 / context_window_size as f32) * 100.0
                 )),
             )?;
         }
@@ -155,31 +157,31 @@ impl UsageArgs {
             style::Print(format!(
                 "~{} tokens ({:.2}%)\n",
                 context_token_count,
-                (context_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                (context_token_count.value() as f32 / context_window_size as f32) * 100.0
             )),
             style::SetForegroundColor(Color::DarkRed),
             style::Print("█ Tools:    "),
             style::SetForegroundColor(Color::Reset),
             style::Print(format!(
                 " ~{} tokens ({:.2}%)\n",
                 tools_token_count,
-                (tools_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                (tools_token_count.value() as f32 / context_window_size as f32) * 100.0
             )),
             style::SetForegroundColor(Color::Blue),
             style::Print("█ Q responses: "),
             style::SetForegroundColor(Color::Reset),
             style::Print(format!(
                 "  ~{} tokens ({:.2}%)\n",
                 assistant_token_count,
-                (assistant_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                (assistant_token_count.value() as f32 / context_window_size as f32) * 100.0
             )),
             style::SetForegroundColor(Color::Magenta),
             style::Print("█ Your prompts: "),
             style::SetForegroundColor(Color::Reset),
             style::Print(format!(
                 " ~{} tokens ({:.2}%)\n\n",
                 user_token_count,
-                (user_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
+                (user_token_count.value() as f32 / context_window_size as f32) * 100.0
             )),
         )?;
 
diff --git a/crates/chat-cli/src/cli/chat/consts.rs b/crates/chat-cli/src/cli/chat/consts.rs
@@ -1,5 +1,3 @@
-use super::token_counter::TokenCounter;
-
 // These limits are the internal undocumented values from the service for each item
 
 pub const MAX_CURRENT_WORKING_DIRECTORY_LEN: usize = 256;
@@ -13,13 +11,6 @@ pub const MAX_TOOL_RESPONSE_SIZE: usize = 400_000;
 /// Actual service limit is 600_000
 pub const MAX_USER_MESSAGE_SIZE: usize = 400_000;
 
-/// In tokens
-pub const CONTEXT_WINDOW_SIZE: usize = 200_000;
-
-pub const CONTEXT_FILES_MAX_SIZE: usize = 150_000;
-
-pub const MAX_CHARS: usize = TokenCounter::token_to_chars(CONTEXT_WINDOW_SIZE); // Character-based warning threshold
-
 pub const DUMMY_TOOL_NAME: &str = "dummy";
 
 pub const MAX_NUMBER_OF_IMAGES_PER_REQUEST: usize = 10;
diff --git a/crates/chat-cli/src/cli/chat/context.rs b/crates/chat-cli/src/cli/chat/context.rs
@@ -14,7 +14,7 @@ use serde::{
     Serializer,
 };
 
-use super::consts::CONTEXT_FILES_MAX_SIZE;
+use super::cli::model::context_window_tokens;
 use super::util::drop_matched_context_files;
 use crate::cli::agent::Agent;
 use crate::cli::agent::hook::{
@@ -103,7 +103,7 @@ pub struct ContextManager {
 }
 
 impl ContextManager {
-    pub fn from_agent(agent: &Agent, max_context_files_size: Option<usize>) -> Result<Self> {
+    pub fn from_agent(agent: &Agent, max_context_files_size: usize) -> Result<Self> {
         let paths = agent
             .resources
             .iter()
@@ -112,7 +112,7 @@ impl ContextManager {
             .collect::<Vec<_>>();
 
         Ok(Self {
-            max_context_files_size: max_context_files_size.unwrap_or(CONTEXT_FILES_MAX_SIZE),
+            max_context_files_size,
             current_profile: agent.name.clone(),
             paths,
             hooks: agent.hooks.clone(),
@@ -254,6 +254,12 @@ impl ContextManager {
     }
 }
 
+/// Calculates the maximum context files size to use for the given model id.
+pub fn calc_max_context_files_size(model_id: Option<&str>) -> usize {
+    // Sets the max as 75% of the context window
+    context_window_tokens(model_id).saturating_mul(3) / 4
+}
+
 /// Process a path, handling glob patterns and file types.
 ///
 /// This method:
@@ -424,4 +430,13 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_calc_max_context_files_size() {
+        assert_eq!(
+            calc_max_context_files_size(Some("CLAUDE_SONNET_4_20250514_V1_0")),
+            150_000
+        );
+        assert_eq!(calc_max_context_files_size(Some("OPENAI_GPT_OSS_120B_1_0")), 96_000);
+    }
 }
diff --git a/crates/chat-cli/src/cli/chat/conversation.rs b/crates/chat-cli/src/cli/chat/conversation.rs
diff --git a/crates/chat-cli/src/cli/chat/mod.rs b/crates/chat-cli/src/cli/chat/mod.rs
diff --git a/crates/chat-cli/src/cli/chat/util/test.rs b/crates/chat-cli/src/cli/chat/util/test.rs