cocoindex-io
diff --git a/‎Cargo.lock‎
Lines changed: 46 additions & 35 deletions b/‎Cargo.lock‎
Lines changed: 46 additions & 35 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 2 additions & 1 deletion b/‎Cargo.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/execution/live_updater.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/execution/live_updater.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/llm/gemini.rs‎
Lines changed: 39 additions & 1 deletion b/‎src/llm/gemini.rs‎
Lines changed: 39 additions & 1 deletion
@@ -133,9 +133,10 @@ time = { version = "0.3", features = ["macros", "serde"] }
 numpy = "0.25.0"
 infer = "0.19.0"
 serde_with = { version = "3.14.0", features = ["base64"] }
-google-cloud-aiplatform-v1 = { version = "0.4.4", default-features = false, features = [
+google-cloud-aiplatform-v1 = { version = "0.4.5", default-features = false, features = [
     "prediction-service",
 ] }
+google-cloud-gax = "0.24.0"
 
 azure_identity = { version = "0.21.0", default-features = false, features = [
     "enable_reqwest_rustls",
 
@@ -144,7 +144,7 @@ impl SourceUpdateTask {
                         async move {
                             let mut change_stream = change_stream;
                             let retry_options = retryable::RetryOptions {
-                                max_retries: None,
+                                retry_timeout: std::time::Duration::from_secs(365 * 24 * 60 * 60),
                                 initial_backoff: std::time::Duration::from_secs(5),
                                 max_backoff: std::time::Duration::from_secs(60),
                             };
 
@@ -6,6 +6,10 @@ use crate::llm::{
 };
 use base64::prelude::*;
 use google_cloud_aiplatform_v1 as vertexai;
+use google_cloud_gax::exponential_backoff::ExponentialBackoff;
+use google_cloud_gax::options::RequestOptionsBuilder;
+use google_cloud_gax::retry_policy::{Aip194Strict, RetryPolicyExt};
+use google_cloud_gax::retry_throttler::{AdaptiveThrottler, SharedRetryThrottler};
 use serde_json::Value;
 use urlencoding::encode;
 
@@ -237,6 +241,33 @@ pub struct VertexAiClient {
     config: super::VertexAiConfig,
 }
 
+#[derive(Debug)]
+struct CustomizedGoogleCloudRetryPolicy;
+
+impl google_cloud_gax::retry_policy::RetryPolicy for CustomizedGoogleCloudRetryPolicy {
+    fn on_error(
+        &self,
+        state: &google_cloud_gax::retry_state::RetryState,
+        error: google_cloud_gax::error::Error,
+    ) -> google_cloud_gax::retry_result::RetryResult {
+        use google_cloud_gax::retry_result::RetryResult;
+
+        if let Some(status) = error.status() {
+            if status.code == google_cloud_gax::error::rpc::Code::ResourceExhausted {
+                return RetryResult::Continue(error);
+            }
+        } else if let Some(code) = error.http_status_code()
+            && code == reqwest::StatusCode::TOO_MANY_REQUESTS.as_u16()
+        {
+            return RetryResult::Continue(error);
+        }
+        Aip194Strict.on_error(state, error)
+    }
+}
+
+static SHARED_RETRY_THROTTLER: LazyLock<SharedRetryThrottler> =
+    LazyLock::new(|| Arc::new(Mutex::new(AdaptiveThrottler::new(2.0).unwrap())));
+
 impl VertexAiClient {
     pub async fn new(
         address: Option<String>,
@@ -249,6 +280,11 @@ impl VertexAiClient {
             api_bail!("VertexAi API config is required for VertexAi API type");
         };
         let client = vertexai::client::PredictionService::builder()
+            .with_retry_policy(
+                CustomizedGoogleCloudRetryPolicy.with_time_limit(retryable::DEFAULT_RETRY_TIMEOUT),
+            )
+            .with_backoff_policy(ExponentialBackoff::default())
+            .with_retry_throttler(SHARED_RETRY_THROTTLER.clone())
             .build()
             .await?;
         Ok(Self { client, config })
@@ -312,7 +348,8 @@ impl LlmGenerationClient for VertexAiClient {
             .client
             .generate_content()
             .set_model(self.get_model_path(request.model))
-            .set_contents(contents);
+            .set_contents(contents)
+            .with_idempotency(true);
         if let Some(sys) = system_instruction {
             req = req.set_system_instruction(sys);
         }
@@ -376,6 +413,7 @@ impl LlmEmbeddingClient for VertexAiClient {
             .set_endpoint(self.get_model_path(request.model))
             .set_instances(instances)
             .set_parameters(parameters)
+            .with_idempotency(true)
             .send()
             .await?;