aws
diff --git a/‎Cargo.lock‎
Lines changed: 2 additions & 2 deletions b/‎Cargo.lock‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crates/chat-cli/src/api_client/error.rs‎
Lines changed: 15 additions & 0 deletions b/‎crates/chat-cli/src/api_client/error.rs‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎crates/chat-cli/src/api_client/mod.rs‎
Lines changed: 102 additions & 2 deletions b/‎crates/chat-cli/src/api_client/mod.rs‎
Lines changed: 102 additions & 2 deletions
@@ -8,7 +8,7 @@ authors = ["Amazon Q CLI Team ([email protected])", "Chay Nabors (nabochay@amazon
 edition = "2024"
 homepage = "https://aws.amazon.com/q/"
 publish = false
-version = "1.13.2"
+version = "1.13.3"
 license = "MIT OR Apache-2.0"
 
 [workspace.dependencies]
 
@@ -1,6 +1,7 @@
 use amzn_codewhisperer_client::operation::create_subscription_token::CreateSubscriptionTokenError;
 use amzn_codewhisperer_client::operation::generate_completions::GenerateCompletionsError;
 use amzn_codewhisperer_client::operation::list_available_customizations::ListAvailableCustomizationsError;
+use amzn_codewhisperer_client::operation::list_available_models::ListAvailableModelsError;
 use amzn_codewhisperer_client::operation::list_available_profiles::ListAvailableProfilesError;
 use amzn_codewhisperer_client::operation::send_telemetry_event::SendTelemetryEventError;
 pub use amzn_codewhisperer_streaming_client::operation::generate_assistant_response::GenerateAssistantResponseError;
@@ -93,6 +94,12 @@ pub enum ApiClientError {
     // Credential errors
     #[error("failed to load credentials: {}", .0)]
     Credentials(CredentialsError),
+
+    #[error(transparent)]
+    ListAvailableModelsError(#[from] SdkError<ListAvailableModelsError, HttpResponse>),
+
+    #[error("No default model found in the ListAvailableModels API response")]
+    DefaultModelNotFound,
 }
 
 impl ApiClientError {
@@ -116,6 +123,8 @@ impl ApiClientError {
             Self::ModelOverloadedError { status_code, .. } => *status_code,
             Self::MonthlyLimitReached { status_code } => *status_code,
             Self::Credentials(_e) => None,
+            Self::ListAvailableModelsError(e) => sdk_status_code(e),
+            Self::DefaultModelNotFound => None,
         }
     }
 }
@@ -141,6 +150,8 @@ impl ReasonCode for ApiClientError {
             Self::ModelOverloadedError { .. } => "ModelOverloadedError".to_string(),
             Self::MonthlyLimitReached { .. } => "MonthlyLimitReached".to_string(),
             Self::Credentials(_) => "CredentialsError".to_string(),
+            Self::ListAvailableModelsError(e) => sdk_error_code(e),
+            Self::DefaultModelNotFound => "DefaultModelNotFound".to_string(),
         }
     }
 }
@@ -188,6 +199,10 @@ mod tests {
                 ListAvailableCustomizationsError::unhandled("<unhandled>"),
                 response(),
             )),
+            ApiClientError::ListAvailableModelsError(SdkError::service_error(
+                ListAvailableModelsError::unhandled("<unhandled>"),
+                response(),
+            )),
             ApiClientError::ListAvailableServices(SdkError::service_error(
                 ListCustomizationsError::unhandled("<unhandled>"),
                 response(),
 
@@ -5,14 +5,16 @@ mod error;
 pub mod model;
 mod opt_out;
 pub mod profile;
+mod retry_classifier;
 pub mod send_message_output;
-
 use std::sync::Arc;
 use std::time::Duration;
 
 use amzn_codewhisperer_client::Client as CodewhispererClient;
 use amzn_codewhisperer_client::operation::create_subscription_token::CreateSubscriptionTokenOutput;
+use amzn_codewhisperer_client::types::Origin::Cli;
 use amzn_codewhisperer_client::types::{
+    Model,
     OptOutPreference,
     SubscriptionStatus,
     TelemetryEvent,
@@ -32,6 +34,7 @@ pub use error::ApiClientError;
 use parking_lot::Mutex;
 pub use profile::list_available_profiles;
 use serde_json::Map;
+use tokio::sync::RwLock;
 use tracing::{
     debug,
     error,
@@ -66,13 +69,28 @@ pub const X_AMZN_CODEWHISPERER_OPT_OUT_HEADER: &str = "x-amzn-codewhisperer-opto
 // TODO(bskiser): confirm timeout is updated to an appropriate value?
 const DEFAULT_TIMEOUT_DURATION: Duration = Duration::from_secs(60 * 5);
 
+#[derive(Clone, Debug)]
+pub struct ModelListResult {
+    pub models: Vec<Model>,
+    pub default_model: Model,
+}
+
+impl From<ModelListResult> for (Vec<Model>, Model) {
+    fn from(v: ModelListResult) -> Self {
+        (v.models, v.default_model)
+    }
+}
+
+type ModelCache = Arc<RwLock<Option<ModelListResult>>>;
+
 #[derive(Clone, Debug)]
 pub struct ApiClient {
     client: CodewhispererClient,
     streaming_client: Option<CodewhispererStreamingClient>,
     sigv4_streaming_client: Option<QDeveloperStreamingClient>,
     mock_client: Option<Arc<Mutex<std::vec::IntoIter<Vec<ChatResponseStream>>>>>,
     profile: Option<AuthProfile>,
+    model_cache: ModelCache,
 }
 
 impl ApiClient {
@@ -112,6 +130,7 @@ impl ApiClient {
                 sigv4_streaming_client: None,
                 mock_client: None,
                 profile: None,
+                model_cache: Arc::new(RwLock::new(None)),
             };
 
             if let Ok(json) = env.get("Q_MOCK_CHAT_RESPONSE") {
@@ -146,6 +165,7 @@ impl ApiClient {
                     .interceptor(UserAgentOverrideInterceptor::new())
                     .app_name(app_name())
                     .endpoint_url(endpoint.url())
+                    .retry_classifier(retry_classifier::QCliRetryClassifier::new())
                     .stalled_stream_protection(stalled_stream_protection_config())
                     .build(),
                 ));
@@ -159,6 +179,7 @@ impl ApiClient {
                         .bearer_token_resolver(BearerResolver)
                         .app_name(app_name())
                         .endpoint_url(endpoint.url())
+                        .retry_classifier(retry_classifier::QCliRetryClassifier::new())
                         .stalled_stream_protection(stalled_stream_protection_config())
                         .build(),
                 ));
@@ -179,6 +200,7 @@ impl ApiClient {
             sigv4_streaming_client,
             mock_client: None,
             profile,
+            model_cache: Arc::new(RwLock::new(None)),
         })
     }
 
@@ -232,6 +254,82 @@ impl ApiClient {
         Ok(profiles)
     }
 
+    pub async fn list_available_models(&self) -> Result<ModelListResult, ApiClientError> {
+        if cfg!(test) {
+            let m = Model::builder()
+                .model_id("model-1")
+                .description("Test Model 1")
+                .build()
+                .unwrap();
+
+            return Ok(ModelListResult {
+                models: vec![m.clone()],
+                default_model: m,
+            });
+        }
+
+        let mut models = Vec::new();
+        let mut default_model = None;
+        let request = self
+            .client
+            .list_available_models()
+            .set_origin(Some(Cli))
+            .set_profile_arn(self.profile.as_ref().map(|p| p.arn.clone()));
+        let mut paginator = request.into_paginator().send();
+
+        while let Some(result) = paginator.next().await {
+            let models_output = result?;
+            models.extend(models_output.models().iter().cloned());
+
+            if default_model.is_none() {
+                default_model = Some(models_output.default_model().clone());
+            }
+        }
+        let default_model = default_model.ok_or_else(|| ApiClientError::DefaultModelNotFound)?;
+        Ok(ModelListResult { models, default_model })
+    }
+
+    pub async fn list_available_models_cached(&self) -> Result<ModelListResult, ApiClientError> {
+        {
+            let cache = self.model_cache.read().await;
+            if let Some(cached) = cache.as_ref() {
+                tracing::debug!("Returning cached model list");
+                return Ok(cached.clone());
+            }
+        }
+
+        tracing::debug!("Cache miss, fetching models from list_available_models API");
+        let result = self.list_available_models().await?;
+        {
+            let mut cache = self.model_cache.write().await;
+            *cache = Some(result.clone());
+        }
+        Ok(result)
+    }
+
+    pub async fn invalidate_model_cache(&self) {
+        let mut cache = self.model_cache.write().await;
+        *cache = None;
+        tracing::info!("Model cache invalidated");
+    }
+
+    pub async fn get_available_models(&self, _region: &str) -> Result<ModelListResult, ApiClientError> {
+        let res = self.list_available_models_cached().await?;
+        // TODO: Once we have access to gpt-oss, add back.
+        // if region == "us-east-1" {
+        //     let gpt_oss = Model::builder()
+        //         .model_id("OPENAI_GPT_OSS_120B_1_0")
+        //         .model_name("openai-gpt-oss-120b-preview")
+        //         .token_limits(TokenLimits::builder().max_input_tokens(128_000).build())
+        //         .build()
+        //         .map_err(ApiClientError::from)?;
+
+        //     models.push(gpt_oss);
+        // }
+
+        Ok(res)
+    }
+
     pub async fn create_subscription_token(&self) -> Result<CreateSubscriptionTokenOutput, ApiClientError> {
         if cfg!(test) {
             return Ok(CreateSubscriptionTokenOutput::builder()
@@ -496,7 +594,9 @@ fn timeout_config(database: &Database) -> TimeoutConfig {
 }
 
 fn retry_config() -> RetryConfig {
-    RetryConfig::standard().with_max_attempts(1)
+    RetryConfig::adaptive()
+        .with_max_attempts(3)
+        .with_max_backoff(Duration::from_secs(10))
 }
 
 pub fn stalled_stream_protection_config() -> StalledStreamProtectionConfig {