Skip to content

Commit 7660ebe

Browse files
authored
fix: Handle INSUFFICIENT_MODEL_CAPACITY ThrottlingException as ModelOverloadedError (#2503)
* change model throttled error checking condition * change model exception from sigv4 client
1 parent 29b5fa7 commit 7660ebe

File tree

1 file changed

+63
-63
lines changed
  • crates/chat-cli/src/api_client

1 file changed

+63
-63
lines changed

crates/chat-cli/src/api_client/mod.rs

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -285,36 +285,34 @@ impl ApiClient {
285285
{
286286
Ok(response) => Ok(SendMessageOutput::Codewhisperer(response)),
287287
Err(err) => {
288-
use amzn_codewhisperer_streaming_client::operation::generate_assistant_response::GenerateAssistantResponseError::ThrottlingError as OperationThrottlingError;
289-
use amzn_codewhisperer_streaming_client::types::ThrottlingExceptionReason;
290-
use amzn_codewhisperer_streaming_client::types::error::ThrottlingError;
291-
292288
let status_code = err.raw_response().map(|res| res.status().as_u16());
293289
let is_quota_breach = status_code.is_some_and(|status| status == 429);
294290
let is_context_window_overflow = err.as_service_error().is_some_and(|err| {
295291
matches!(err, err if err.meta().code() == Some("ValidationException") && err.meta().message() == Some("Input is too long."))
296292
});
297293

298-
let is_model_unavailable =
299-
// Handling the updated error response
300-
err.as_service_error().is_some_and(|err| {
301-
matches!(
302-
err,
303-
OperationThrottlingError(ThrottlingError {
304-
reason: Some(ThrottlingExceptionReason::InsufficientModelCapacity),
305-
..
306-
})
307-
)
308-
})
309-
// Legacy error response
294+
let is_model_unavailable = {
295+
// check if ThrottlingException
296+
let is_throttling_exception = err
297+
.as_service_error()
298+
.is_some_and(|service_err| service_err.meta().code() == Some("ThrottlingException"));
299+
300+
// check if the response contains INSUFFICIENT_MODEL_CAPACITY
301+
let has_insufficient_capacity = err
302+
.raw_response()
303+
.and_then(|resp| resp.body().bytes())
304+
.and_then(|bytes| String::from_utf8(bytes.to_vec()).ok())
305+
.is_some_and(|body| body.contains("INSUFFICIENT_MODEL_CAPACITY"));
306+
307+
(is_throttling_exception && has_insufficient_capacity)
308+
// Legacy error response fallback
310309
|| (model_id_opt.is_some()
311-
&& status_code.is_some_and(|status| status == 500)
312-
&& err.as_service_error().is_some_and(|err| {
313-
err.meta().message()
314-
== Some(
315-
"Encountered unexpectedly high load when processing the request, please try again.",
316-
)
317-
}));
310+
&& status_code.is_some_and(|status| status == 500)
311+
&& err.as_service_error().is_some_and(|err| {
312+
err.meta().message() == Some(
313+
"Encountered unexpectedly high load when processing the request, please try again.",
314+
)}))
315+
};
318316

319317
let is_monthly_limit_err = err
320318
.raw_response()
@@ -325,17 +323,12 @@ impl ApiClient {
325323
})
326324
.unwrap_or(false);
327325

328-
if is_quota_breach {
329-
return Err(ApiClientError::QuotaBreach {
330-
message: "quota has reached its limit",
331-
status_code,
332-
});
333-
}
334-
335326
if is_context_window_overflow {
336327
return Err(ApiClientError::ContextWindowOverflow { status_code });
337328
}
338329

330+
// Both ModelOverloadedError and QuotaBreach return 429,
331+
// so check is_model_unavailable first.
339332
if is_model_unavailable {
340333
return Err(ApiClientError::ModelOverloadedError {
341334
request_id: err
@@ -346,6 +339,13 @@ impl ApiClient {
346339
});
347340
}
348341

342+
if is_quota_breach {
343+
return Err(ApiClientError::QuotaBreach {
344+
message: "quota has reached its limit",
345+
status_code,
346+
});
347+
}
348+
349349
if is_monthly_limit_err {
350350
return Err(ApiClientError::MonthlyLimitReached { status_code });
351351
}
@@ -377,36 +377,34 @@ impl ApiClient {
377377
{
378378
Ok(response) => Ok(SendMessageOutput::QDeveloper(response)),
379379
Err(err) => {
380-
use amzn_qdeveloper_streaming_client::operation::send_message::SendMessageError::ThrottlingError as OperationThrottlingError;
381-
use amzn_qdeveloper_streaming_client::types::ThrottlingExceptionReason;
382-
use amzn_qdeveloper_streaming_client::types::error::ThrottlingError;
383-
384380
let status_code = err.raw_response().map(|res| res.status().as_u16());
385381
let is_quota_breach = status_code.is_some_and(|status| status == 429);
386382
let is_context_window_overflow = err.as_service_error().is_some_and(|err| {
387383
matches!(err, err if err.meta().code() == Some("ValidationException") && err.meta().message() == Some("Input is too long."))
388384
});
389385

390-
let is_model_unavailable =
391-
// Handling the updated error response
392-
err.as_service_error().is_some_and(|err| {
393-
matches!(
394-
err,
395-
OperationThrottlingError(ThrottlingError {
396-
reason: Some(ThrottlingExceptionReason::InsufficientModelCapacity),
397-
..
398-
})
399-
)
400-
})
401-
// Legacy error response
386+
let is_model_unavailable = {
387+
// check if ThrottlingException
388+
let is_throttling_exception = err
389+
.as_service_error()
390+
.is_some_and(|service_err| service_err.meta().code() == Some("ThrottlingException"));
391+
392+
// check if the response contains INSUFFICIENT_MODEL_CAPACITY
393+
let has_insufficient_capacity = err
394+
.raw_response()
395+
.and_then(|resp| resp.body().bytes())
396+
.and_then(|bytes| String::from_utf8(bytes.to_vec()).ok())
397+
.is_some_and(|body| body.contains("INSUFFICIENT_MODEL_CAPACITY"));
398+
399+
(is_throttling_exception && has_insufficient_capacity)
400+
// Legacy error response fallback
402401
|| (model_id_opt.is_some()
403-
&& status_code.is_some_and(|status| status == 500)
404-
&& err.as_service_error().is_some_and(|err| {
405-
err.meta().message()
406-
== Some(
407-
"Encountered unexpectedly high load when processing the request, please try again.",
408-
)
409-
}));
402+
&& status_code.is_some_and(|status| status == 500)
403+
&& err.as_service_error().is_some_and(|err| {
404+
err.meta().message() == Some(
405+
"Encountered unexpectedly high load when processing the request, please try again.",
406+
)}))
407+
};
410408

411409
let is_monthly_limit_err = err
412410
.raw_response()
@@ -417,6 +415,18 @@ impl ApiClient {
417415
})
418416
.unwrap_or(false);
419417

418+
// Both ModelOverloadedError and QuotaBreach return 429,
419+
// so check is_model_unavailable first.
420+
if is_model_unavailable {
421+
return Err(ApiClientError::ModelOverloadedError {
422+
request_id: err
423+
.as_service_error()
424+
.and_then(|err| err.meta().request_id())
425+
.map(|s| s.to_string()),
426+
status_code,
427+
});
428+
}
429+
420430
if is_quota_breach {
421431
return Err(ApiClientError::QuotaBreach {
422432
message: "quota has reached its limit",
@@ -428,16 +438,6 @@ impl ApiClient {
428438
return Err(ApiClientError::ContextWindowOverflow { status_code });
429439
}
430440

431-
if is_model_unavailable {
432-
return Err(ApiClientError::ModelOverloadedError {
433-
request_id: err
434-
.as_service_error()
435-
.and_then(|err| err.meta().request_id())
436-
.map(|s| s.to_string()),
437-
status_code,
438-
});
439-
}
440-
441441
if is_monthly_limit_err {
442442
return Err(ApiClientError::MonthlyLimitReached { status_code });
443443
}

0 commit comments

Comments
 (0)