@@ -7,6 +7,7 @@ use crate::llm::{
77use base64:: prelude:: * ;
88use google_cloud_aiplatform_v1 as vertexai;
99use google_cloud_gax:: exponential_backoff:: ExponentialBackoff ;
10+ use google_cloud_gax:: options:: RequestOptionsBuilder ;
1011use google_cloud_gax:: retry_policy:: { Aip194Strict , RetryPolicyExt } ;
1112use google_cloud_gax:: retry_throttler:: { AdaptiveThrottler , SharedRetryThrottler } ;
1213use serde_json:: Value ;
@@ -251,9 +252,6 @@ impl google_cloud_gax::retry_policy::RetryPolicy for CustomizedGoogleCloudRetryP
251252 ) -> google_cloud_gax:: retry_result:: RetryResult {
252253 use google_cloud_gax:: retry_result:: RetryResult ;
253254
254- if !state. idempotent {
255- return RetryResult :: Permanent ( error) ;
256- }
257255 if let Some ( status) = error. status ( ) {
258256 if status. code == google_cloud_gax:: error:: rpc:: Code :: ResourceExhausted {
259257 return RetryResult :: Continue ( error) ;
@@ -350,7 +348,8 @@ impl LlmGenerationClient for VertexAiClient {
350348 . client
351349 . generate_content ( )
352350 . set_model ( self . get_model_path ( request. model ) )
353- . set_contents ( contents) ;
351+ . set_contents ( contents)
352+ . with_idempotency ( true ) ;
354353 if let Some ( sys) = system_instruction {
355354 req = req. set_system_instruction ( sys) ;
356355 }
@@ -414,6 +413,7 @@ impl LlmEmbeddingClient for VertexAiClient {
414413 . set_endpoint ( self . get_model_path ( request. model ) )
415414 . set_instances ( instances)
416415 . set_parameters ( parameters)
416+ . with_idempotency ( true )
417417 . send ( )
418418 . await ?;
419419
0 commit comments