combine api res and openai model

evanliu048 · evanliu048 · commit 4bcd58ec1f7e · 2025-08-07T00:39:24.000-07:00
diff --git a/crates/chat-cli/src/api_client/mod.rs b/crates/chat-cli/src/api_client/mod.rs
@@ -6,7 +6,6 @@ pub mod model;
 mod opt_out;
 pub mod profile;
 pub mod send_message_output;
-
 use std::sync::Arc;
 use std::time::Duration;
 
@@ -18,6 +17,7 @@ use amzn_codewhisperer_client::types::{
     OptOutPreference,
     SubscriptionStatus,
     TelemetryEvent,
+    TokenLimits,
     UserContext,
 };
 use amzn_codewhisperer_streaming_client::Client as CodewhispererStreamingClient;
@@ -275,9 +275,7 @@ impl ApiClient {
             models.extend(models_output.models().iter().cloned());
 
             if default_model.is_none() {
-                if let Some(model) = models_output.default_model().cloned() {
-                    default_model = Some(model);
-                }
+                default_model = Some(models_output.default_model().clone());
             }
         }
 
@@ -308,6 +306,23 @@ impl ApiClient {
         tracing::info!("Model cache invalidated");
     }
 
+    pub async fn get_available_models(&self, region: &str) -> Result<(Vec<Model>, Option<Model>), ApiClientError> {
+        let (mut models, default_model) = self.list_available_models_cached().await?;
+
+        if region == "us-east-1" {
+            let gpt_oss = Model::builder()
+                .model_id("OPENAI_GPT_OSS_120B_1_0")
+                .model_name("openai-gpt-oss-120b-preview")
+                .token_limits(TokenLimits::builder().max_input_tokens(128_000).build())
+                .build()
+                .map_err(ApiClientError::from)?;
+
+            models.push(gpt_oss);
+        }
+
+        Ok((models, default_model))
+    }
+
     pub async fn create_subscription_token(&self) -> Result<CreateSubscriptionTokenOutput, ApiClientError> {
         if cfg!(test) {
             return Ok(CreateSubscriptionTokenOutput::builder()
diff --git a/crates/chat-cli/src/cli/chat/cli/context.rs b/crates/chat-cli/src/cli/chat/cli/context.rs
@@ -222,7 +222,8 @@ impl ContextSubcommand {
                         execute!(session.stderr, style::Print(format!("{}\n\n", "▔".repeat(3))),)?;
                     }
 
-                    let context_files_max_size = calc_max_context_files_size(session.conversation.model.as_deref());
+                    let context_files_max_size =
+                        calc_max_context_files_size(session.conversation.model.as_deref(), os).await;
                     let mut files_as_vec = profile_context_files
                         .iter()
                         .map(|(path, content, _)| (path.clone(), content.clone()))
diff --git a/crates/chat-cli/src/cli/chat/cli/model.rs b/crates/chat-cli/src/cli/chat/cli/model.rs
@@ -1,3 +1,4 @@
+use amzn_codewhisperer_client::types::Model;
 use clap::Args;
 use crossterm::style::{
     self,
@@ -20,35 +21,6 @@ use crate::cli::chat::{
     ChatState,
 };
 use crate::os::Os;
-
-pub struct ModelOption {
-    /// Display name
-    pub name: &'static str,
-    /// Actual model id to send in the API
-    pub model_id: &'static str,
-    /// Size of the model's context window, in tokens
-    pub context_window_tokens: usize,
-}
-
-const MODEL_OPTIONS: [ModelOption; 2] = [
-    ModelOption {
-        name: "claude-4-sonnet",
-        model_id: "CLAUDE_SONNET_4_20250514_V1_0",
-        context_window_tokens: 200_000,
-    },
-    ModelOption {
-        name: "claude-3.7-sonnet",
-        model_id: "CLAUDE_3_7_SONNET_20250219_V1_0",
-        context_window_tokens: 200_000,
-    },
-];
-
-const GPT_OSS_120B: ModelOption = ModelOption {
-    name: "openai-gpt-oss-120b-preview",
-    model_id: "OPENAI_GPT_OSS_120B_1_0",
-    context_window_tokens: 128_000,
-};
-
 #[deny(missing_docs)]
 #[derive(Debug, PartialEq, Args)]
 pub struct ModelArgs;
@@ -65,11 +37,7 @@ pub async fn select_model(os: &Os, session: &mut ChatSession) -> Result<Option<C
     queue!(session.stderr, style::Print("\n"))?;
 
     // Fetch available models from service
-    let (models, _default_model) = os
-        .client
-        .list_available_models_cached()
-        .await
-        .map_err(|e| ChatError::Custom(format!("Failed to fetch available models: {}", e).into()))?;
+    let (models, _default_model) = get_available_models(os).await?;
 
     if models.is_empty() {
         queue!(
@@ -82,15 +50,16 @@ pub async fn select_model(os: &Os, session: &mut ChatSession) -> Result<Option<C
     }
 
     let active_model_id = session.conversation.model.as_deref();
-    let model_options = get_model_options(os).await?;
 
-    let labels: Vec<String> = model_options
+    let labels: Vec<String> = models
         .iter()
         .map(|model| {
+            let display_name = model.model_name().unwrap_or(model.model_id());
+
             if Some(model.model_id()) == active_model_id {
-                format!("{} (active)", model.model_id())
+                format!("{} (active)", display_name)
             } else {
-                model.model_id().to_owned()
+                display_name.to_owned()
             }
         })
         .collect();
@@ -119,11 +88,12 @@ pub async fn select_model(os: &Os, session: &mut ChatSession) -> Result<Option<C
         let selected = &models[index];
         let model_id_str = selected.model_id.clone();
         session.conversation.model = Some(model_id_str.clone());
+        let display_name = selected.model_name().unwrap_or(selected.model_id());
 
         queue!(
             session.stderr,
             style::Print("\n"),
-            style::Print(format!(" Using {}\n\n", model_id_str)),
+            style::Print(format!(" Using {}\n\n", display_name)),
             style::ResetColor,
             style::SetForegroundColor(Color::Reset),
             style::SetBackgroundColor(Color::Reset),
@@ -160,60 +130,41 @@ pub async fn default_model_id(os: &Os) -> String {
     "claude-4-sonnet".to_string()
 }
 
-/// Returns the available models for use.
-pub async fn get_model_options(os: &Os) -> Result<Vec<ModelOption>, ChatError> {
-    let mut model_options = MODEL_OPTIONS.into_iter().collect::<Vec<_>>();
-
-    // GPT OSS is only accessible in IAD.
+/// Get available models with caching support
+pub async fn get_available_models(os: &Os) -> Result<(Vec<Model>, Option<Model>), ChatError> {
     let endpoint = Endpoint::configured_value(&os.database);
-    if endpoint.region().as_ref() != "us-east-1" {
-        return Ok(model_options);
-    }
+    let region = endpoint.region().as_ref();
 
-    model_options.push(GPT_OSS_120B);
-    Ok(model_options)
+    os.client
+        .get_available_models(region)
+        .await
+        .map_err(|e| ChatError::Custom(format!("Failed to fetch available models: {}", e).into()))
 }
 
 /// Returns the context window length in tokens for the given model_id.
-pub fn context_window_tokens(model_id: Option<&str>) -> usize {
+/// Uses cached model data when available
+pub async fn context_window_tokens(model_id: Option<&str>, os: &Os) -> usize {
     const DEFAULT_CONTEXT_WINDOW_LENGTH: usize = 200_000;
 
+    // If no model_id provided, return default
     let Some(model_id) = model_id else {
         return DEFAULT_CONTEXT_WINDOW_LENGTH;
     };
 
-    MODEL_OPTIONS
-        .iter()
-        .chain(std::iter::once(&GPT_OSS_120B))
-        .find(|m| m.model_id == model_id)
-        .map_or(DEFAULT_CONTEXT_WINDOW_LENGTH, |m| m.context_window_tokens)
-}
-
-/// Returns the available models for use.
-pub async fn get_model_options(os: &Os) -> Result<Vec<ModelOption>, ChatError> {
-    let mut model_options = MODEL_OPTIONS.into_iter().collect::<Vec<_>>();
-
-    // GPT OSS is only accessible in IAD.
-    let endpoint = Endpoint::configured_value(&os.database);
-    if endpoint.region().as_ref() != "us-east-1" {
-        return Ok(model_options);
-    }
-
-    model_options.push(GPT_OSS_120B);
-    Ok(model_options)
-}
-
-/// Returns the context window length in tokens for the given model_id.
-pub fn context_window_tokens(model_id: Option<&str>) -> usize {
-    const DEFAULT_CONTEXT_WINDOW_LENGTH: usize = 200_000;
-
-    let Some(model_id) = model_id else {
-        return DEFAULT_CONTEXT_WINDOW_LENGTH;
+    // Try to get from cached models first
+    let (models, _) = match get_available_models(os).await {
+        Ok(models) => models,
+        Err(_) => {
+            // If we can't get models, return default
+            return DEFAULT_CONTEXT_WINDOW_LENGTH;
+        },
     };
 
-    MODEL_OPTIONS
+    models
         .iter()
-        .chain(std::iter::once(&GPT_OSS_120B))
-        .find(|m| m.model_id == model_id)
-        .map_or(DEFAULT_CONTEXT_WINDOW_LENGTH, |m| m.context_window_tokens)
+        .find(|m| m.model_id() == model_id)
+        .and_then(|m| m.token_limits())
+        .and_then(|limits| limits.max_input_tokens())
+        .map(|tokens| tokens as usize)
+        .unwrap_or(DEFAULT_CONTEXT_WINDOW_LENGTH)
 }
diff --git a/crates/chat-cli/src/cli/chat/cli/usage.rs b/crates/chat-cli/src/cli/chat/cli/usage.rs
@@ -62,8 +62,7 @@ impl UsageArgs {
         // set a max width for the progress bar for better aesthetic
         let progress_bar_width = std::cmp::min(window_width, 80);
 
-        let context_window_size = context_window_tokens(session.conversation.model.as_deref());
-
+        let context_window_size = context_window_tokens(session.conversation.model.as_deref(), os).await;
         let context_width =
             ((context_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
         let assistant_width =
diff --git a/crates/chat-cli/src/cli/chat/context.rs b/crates/chat-cli/src/cli/chat/context.rs
@@ -255,9 +255,9 @@ impl ContextManager {
 }
 
 /// Calculates the maximum context files size to use for the given model id.
-pub fn calc_max_context_files_size(model_id: Option<&str>) -> usize {
+pub async fn calc_max_context_files_size(model_id: Option<&str>, os: &Os) -> usize {
     // Sets the max as 75% of the context window
-    context_window_tokens(model_id).saturating_mul(3) / 4
+    context_window_tokens(model_id, os).await.saturating_mul(3) / 4
 }
 
 /// Process a path, handling glob patterns and file types.
@@ -432,11 +432,13 @@ mod tests {
     }
 
     #[test]
-    fn test_calc_max_context_files_size() {
+    async fn test_calc_max_context_files_size() {
+        let os = Os::new().await.unwrap();
+
         assert_eq!(
-            calc_max_context_files_size(Some("CLAUDE_SONNET_4_20250514_V1_0")),
+            calc_max_context_files_size(Some("CLAUDE_SONNET_4_20250514_V1_0"), os),
             150_000
         );
-        assert_eq!(calc_max_context_files_size(Some("OPENAI_GPT_OSS_120B_1_0")), 96_000);
+        assert_eq!(calc_max_context_files_size(Some("OPENAI_GPT_OSS_120B_1_0"), os), 96_000);
     }
 }
diff --git a/crates/chat-cli/src/cli/chat/conversation.rs b/crates/chat-cli/src/cli/chat/conversation.rs
@@ -118,9 +118,14 @@ impl ConversationState {
         tool_config: HashMap<String, ToolSpec>,
         tool_manager: ToolManager,
         current_model_id: Option<String>,
+        os: &Os,
     ) -> Self {
         let context_manager = if let Some(agent) = agents.get_active() {
-            ContextManager::from_agent(agent, calc_max_context_files_size(current_model_id.as_deref())).ok()
+            ContextManager::from_agent(
+                agent,
+                calc_max_context_files_size(current_model_id.as_deref(), os).await,
+            )
+            .ok()
         } else {
             None
         };
@@ -638,7 +643,7 @@ impl ConversationState {
     /// Get the current token warning level
     pub async fn get_token_warning_level(&mut self, os: &Os) -> Result<TokenWarningLevel, ChatError> {
         let total_chars = self.calculate_char_count(os).await?;
-        let max_chars = TokenCounter::token_to_chars(context_window_tokens(self.model.as_deref()));
+        let max_chars = TokenCounter::token_to_chars(context_window_tokens(self.model.as_deref(), os).await);
 
         Ok(if *total_chars >= max_chars {
             TokenWarningLevel::Critical
@@ -1061,6 +1066,7 @@ mod tests {
             tool_manager.load_tools(&mut os, &mut output).await.unwrap(),
             tool_manager,
             None,
+            os,
         )
         .await;
 
@@ -1092,6 +1098,7 @@ mod tests {
             tool_config.clone(),
             tool_manager.clone(),
             None,
+            os,
         )
         .await;
         conversation.set_next_user_message("start".to_string()).await;
@@ -1120,8 +1127,15 @@ mod tests {
         }
 
         // Build a long conversation history of user messages mixed in with tool results.
-        let mut conversation =
-            ConversationState::new("fake_conv_id", agents, tool_config.clone(), tool_manager.clone(), None).await;
+        let mut conversation = ConversationState::new(
+            "fake_conv_id",
+            agents,
+            tool_config.clone(),
+            tool_manager.clone(),
+            None,
+            os,
+        )
+        .await;
         conversation.set_next_user_message("start".to_string()).await;
         for i in 0..=(MAX_CONVERSATION_STATE_HISTORY_LEN + 100) {
             let s = conversation
@@ -1173,6 +1187,7 @@ mod tests {
             tool_manager.load_tools(&mut os, &mut output).await.unwrap(),
             tool_manager,
             None,
+            os,
         )
         .await;
 
diff --git a/crates/chat-cli/src/cli/chat/mod.rs b/crates/chat-cli/src/cli/chat/mod.rs
@@ -42,7 +42,7 @@ use clap::{
 };
 use cli::compact::CompactStrategy;
 use cli::model::{
-    get_model_options,
+    get_available_models,
     select_model,
 };
 pub use conversation::ConversationState;
@@ -629,7 +629,7 @@ impl ChatSession {
                 cs.enforce_tool_use_history_invariants();
                 cs
             },
-            false => ConversationState::new(conversation_id, agents, tool_config, tool_manager, model_id).await,
+            false => ConversationState::new(conversation_id, agents, tool_config, tool_manager, model_id, os).await,
         };
 
         // Spawn a task for listening and broadcasting sigints.
@@ -1185,12 +1185,13 @@ impl ChatSession {
         self.stderr.flush()?;
 
         if let Some(ref id) = self.conversation.model {
-            let model_options = get_model_options(os).await?;
-            if let Some(model_option) = model_options.iter().find(|option| option.model_id == *id) {
+            let (models, _default_model) = get_available_models(os).await?;
+            if let Some(model_option) = models.iter().find(|option| option.model_id == *id) {
+                let display_name = model_option.model_name().unwrap_or_else(|| &model_option.model_id);
                 execute!(
                     self.stderr,
                     style::SetForegroundColor(Color::Cyan),
-                    style::Print(format!("🤖 You are chatting with {}\n", model_option.name)),
+                    style::Print(format!("🤖 You are chatting with {}\n", display_name)),
                     style::SetForegroundColor(Color::Reset),
                     style::Print("\n")
                 )?;

Original file line number	Diff line number	Diff line change
`@@ -222,7 +222,8 @@ impl ContextSubcommand {`
`222`	`222`	`execute!(session.stderr, style::Print(format!("{}\n\n", "▔".repeat(3))),)?;`
`223`	`223`	`}`
`224`	`224`
`225`		`- let context_files_max_size = calc_max_context_files_size(session.conversation.model.as_deref());`
	`225`	`+ let context_files_max_size =`
	`226`	`+ calc_max_context_files_size(session.conversation.model.as_deref(), os).await;`
`226`	`227`	`let mut files_as_vec = profile_context_files`
`227`	`228`	`.iter()`
`228`	`229`	`.map(\|(path, content, _)\| (path.clone(), content.clone()))`
Original file line number	Diff line number	Diff line change
`@@ -255,9 +255,9 @@ impl ContextManager {`
`255`	`255`	`}`
`256`	`256`
`257`	`257`	`/// Calculates the maximum context files size to use for the given model id.`
`258`		`-pub fn calc_max_context_files_size(model_id: Option<&str>) -> usize {`
	`258`	`+pub async fn calc_max_context_files_size(model_id: Option<&str>, os: &Os) -> usize {`
`259`	`259`	`// Sets the max as 75% of the context window`
`260`		`- context_window_tokens(model_id).saturating_mul(3) / 4`
	`260`	`+ context_window_tokens(model_id, os).await.saturating_mul(3) / 4`
`261`	`261`	`}`
`262`	`262`
`263`	`263`	`/// Process a path, handling glob patterns and file types.`
`@@ -432,11 +432,13 @@ mod tests {`
`432`	`432`	`}`
`433`	`433`
`434`	`434`	`#[test]`
`435`		`- fn test_calc_max_context_files_size() {`
	`435`	`+ async fn test_calc_max_context_files_size() {`
	`436`	`+ let os = Os::new().await.unwrap();`
	`437`	`+`
`436`	`438`	`assert_eq!(`
`437`		`- calc_max_context_files_size(Some("CLAUDE_SONNET_4_20250514_V1_0")),`
	`439`	`+ calc_max_context_files_size(Some("CLAUDE_SONNET_4_20250514_V1_0"), os),`
`438`	`440`	`150_000`
`439`	`441`	`);`
`440`		`- assert_eq!(calc_max_context_files_size(Some("OPENAI_GPT_OSS_120B_1_0")), 96_000);`
	`442`	`+ assert_eq!(calc_max_context_files_size(Some("OPENAI_GPT_OSS_120B_1_0"), os), 96_000);`
`441`	`443`	`}`
`442`	`444`	`}`