add cache in apiclient for model

evanliu048 · evanliu048 · commit 3aff8595ae6a · 2025-07-28T17:11:50.000-07:00
diff --git a/crates/chat-cli/src/api_client/mod.rs b/crates/chat-cli/src/api_client/mod.rs
@@ -34,6 +34,7 @@ pub use error::ApiClientError;
 use parking_lot::Mutex;
 pub use profile::list_available_profiles;
 use serde_json::Map;
+use tokio::sync::RwLock;
 use tracing::{
     debug,
     error,
@@ -75,6 +76,7 @@ pub struct ApiClient {
     sigv4_streaming_client: Option<QDeveloperStreamingClient>,
     mock_client: Option<Arc<Mutex<std::vec::IntoIter<Vec<ChatResponseStream>>>>>,
     profile: Option<AuthProfile>,
+    model_cache: Arc<RwLock<Option<(Vec<Model>, Option<Model>)>>>,
 }
 
 impl ApiClient {
@@ -114,6 +116,7 @@ impl ApiClient {
                 sigv4_streaming_client: None,
                 mock_client: None,
                 profile: None,
+                model_cache: Arc::new(RwLock::new(None)),
             };
 
             if let Ok(json) = env.get("Q_MOCK_CHAT_RESPONSE") {
@@ -181,6 +184,7 @@ impl ApiClient {
             sigv4_streaming_client,
             mock_client: None,
             profile,
+            model_cache: Arc::new(RwLock::new(None)),
         })
     }
 
@@ -277,6 +281,30 @@ impl ApiClient {
         Ok((models, default_model))
     }
 
+    pub async fn list_available_models_cached(&self) -> Result<(Vec<Model>, Option<Model>), ApiClientError> {
+        {
+            let cache = self.model_cache.read().await;
+            if let Some(cached) = cache.as_ref() {
+                tracing::debug!("Returning cached model list");
+                return Ok(cached.clone());
+            }
+        }
+
+        tracing::debug!("Cache miss, fetching models from list_available_models API");
+        let result = self.list_available_models().await?;
+        {
+            let mut cache = self.model_cache.write().await;
+            *cache = Some(result.clone());
+        }
+        Ok(result)
+    }
+
+    pub async fn invalidate_model_cache(&self) {
+        let mut cache = self.model_cache.write().await;
+        *cache = None;
+        tracing::info!("Model cache invalidated");
+    }
+
     pub async fn create_subscription_token(&self) -> Result<CreateSubscriptionTokenOutput, ApiClientError> {
         if cfg!(test) {
             return Ok(CreateSubscriptionTokenOutput::builder()
diff --git a/crates/chat-cli/src/cli/chat/cli/model.rs b/crates/chat-cli/src/cli/chat/cli/model.rs
@@ -38,7 +38,7 @@ pub async fn select_model(os: &mut Os, session: &mut ChatSession) -> Result<Opti
     // Fetch available models from service
     let (models, _default_model) = os
         .client
-        .list_available_models()
+        .list_available_models_cached()
         .await
         .map_err(|e| ChatError::Custom(format!("Failed to fetch available models: {}", e).into()))?;
 
diff --git a/crates/chat-cli/src/cli/chat/mod.rs b/crates/chat-cli/src/cli/chat/mod.rs
@@ -289,7 +289,7 @@ impl ChatArgs {
 
         // If modelId is specified, verify it exists before starting the chat
         // Otherwise, CLI will use a default model when starting chat
-        let (models, default_model_opt) = os.client.list_available_models().await?;
+        let (models, default_model_opt) = os.client.list_available_models_cached().await?;
         let model_id: Option<String> = if let Some(requested) = self.model.as_ref() {
             let requested_lower = requested.to_lowercase();
             if let Some(m) = models
@@ -2385,6 +2385,7 @@ impl ChatSession {
     }
 
     async fn retry_model_overload(&mut self, os: &mut Os) -> Result<ChatState, ChatError> {
+        os.client.invalidate_model_cache().await;
         match select_model(os, self).await {
             Ok(Some(_)) => (),
             Ok(None) => {