Skip to content

Commit 10f79f5

Browse files
authored
Fix and Simplify retry-classifier (#2563)
1 parent 8aacf67 commit 10f79f5

File tree

1 file changed

+28
-189
lines changed

1 file changed

+28
-189
lines changed

crates/chat-cli/src/api_client/retry_classifier.rs

Lines changed: 28 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,10 @@ use aws_smithy_runtime_api::client::retries::classifiers::{
88
};
99
use tracing::debug;
1010

11-
/// Error marker for monthly limit exceeded errors
1211
const MONTHLY_LIMIT_ERROR_MARKER: &str = "MONTHLY_REQUEST_COUNT";
13-
14-
/// Error message for high load conditions that should be retried
1512
const HIGH_LOAD_ERROR_MESSAGE: &str =
1613
"Encountered unexpectedly high load when processing the request, please try again.";
17-
18-
/// Error message for insufficient model capacity that should be retried
19-
const INSUFFICIENT_MODEL_CAPACITY_MESSAGE: &str = "I am experiencing high traffic, please try again shortly.";
20-
21-
/// Status codes that indicate service overload/unavailability and should be retried
22-
const SERVICE_OVERLOAD_STATUS_CODES: &[u16] = &[
23-
429, // Too Many Requests - throttling with insufficient model capacity
24-
500, // Internal Server Error - requires specific message check for high load conditions
25-
503, // Service Unavailable - server is temporarily overloaded or under maintenance
26-
];
14+
const SERVICE_UNAVAILABLE_EXCEPTION: &str = "ServiceUnavailableException";
2715

2816
#[derive(Debug, Default)]
2917
pub struct QCliRetryClassifier;
@@ -33,112 +21,58 @@ impl QCliRetryClassifier {
3321
Self
3422
}
3523

36-
/// Return the priority of this retry classifier.
37-
///
38-
/// We want this to run after the standard classifiers but with high priority
39-
/// to override their decisions for our specific error cases.
40-
///
41-
/// # Returns
42-
/// A priority that runs after the transient error classifier but can override its decisions.
4324
pub fn priority() -> RetryClassifierPriority {
4425
RetryClassifierPriority::run_after(RetryClassifierPriority::transient_error_classifier())
4526
}
4627

47-
/// Check if the error indicates a monthly limit has been reached
48-
fn is_monthly_limit_error(ctx: &InterceptorContext) -> bool {
49-
let Some(resp) = ctx.response() else {
50-
return false;
51-
};
52-
53-
// Check status code first - monthly limit errors typically return 429 (Too Many Requests)
54-
let status_code = resp.status().as_u16();
55-
if status_code != 429 {
56-
return false;
57-
}
58-
59-
let Some(bytes) = resp.body().bytes() else {
60-
return false;
61-
};
62-
63-
let is_monthly_limit = match std::str::from_utf8(bytes) {
64-
Ok(body_str) => body_str.contains(MONTHLY_LIMIT_ERROR_MARKER),
65-
Err(_) => false,
66-
};
28+
fn extract_response_body(ctx: &InterceptorContext) -> Option<&str> {
29+
let bytes = ctx.response()?.body().bytes()?;
30+
std::str::from_utf8(bytes).ok()
31+
}
6732

33+
fn is_monthly_limit_error(body_str: &str) -> bool {
34+
let is_monthly_limit = body_str.contains(MONTHLY_LIMIT_ERROR_MARKER);
6835
debug!(
6936
"QCliRetryClassifier: Monthly limit error detected: {}",
7037
is_monthly_limit
7138
);
7239
is_monthly_limit
7340
}
7441

75-
/// Check if the error indicates a model is unavailable due to high load
76-
fn is_service_overloaded_error(ctx: &InterceptorContext) -> bool {
42+
fn is_service_overloaded_error(ctx: &InterceptorContext, body_str: &str) -> bool {
7743
let Some(resp) = ctx.response() else {
7844
return false;
7945
};
8046

81-
let status_code = resp.status().as_u16();
82-
83-
// Fail fast: if status code is not in our list, return false immediately
84-
if !SERVICE_OVERLOAD_STATUS_CODES.contains(&status_code) {
47+
if resp.status().as_u16() != 500 {
8548
return false;
8649
}
8750

88-
let is_overloaded = match status_code {
89-
429 => {
90-
// For 429 errors, check if the response body contains the insufficient model capacity message
91-
let Some(bytes) = resp.body().bytes() else {
92-
return false;
93-
};
94-
95-
match std::str::from_utf8(bytes) {
96-
Ok(body_str) => body_str.contains(INSUFFICIENT_MODEL_CAPACITY_MESSAGE),
97-
Err(_) => false,
98-
}
99-
},
100-
500 => {
101-
// For 500 errors, check if the response body contains the specific high load message
102-
let Some(bytes) = resp.body().bytes() else {
103-
return false;
104-
};
105-
106-
match std::str::from_utf8(bytes) {
107-
Ok(body_str) => body_str.contains(HIGH_LOAD_ERROR_MESSAGE),
108-
Err(_) => false,
109-
}
110-
},
111-
503 => {
112-
// For 503 Service Unavailable, always retry (no additional checks needed)
113-
true
114-
},
115-
_ => {
116-
// This shouldn't happen given our fail-fast check above, but handle gracefully
117-
false
118-
},
119-
};
51+
let is_overloaded =
52+
body_str.contains(HIGH_LOAD_ERROR_MESSAGE) || body_str.contains(SERVICE_UNAVAILABLE_EXCEPTION);
12053

12154
debug!(
122-
"QCliRetryClassifier: Service overloaded error detected (status {}): {}",
123-
status_code, is_overloaded
55+
"QCliRetryClassifier: Service overloaded error detected (status 500): {}",
56+
is_overloaded
12457
);
12558
is_overloaded
12659
}
12760
}
12861

12962
impl ClassifyRetry for QCliRetryClassifier {
13063
fn classify_retry(&self, ctx: &InterceptorContext) -> RetryAction {
131-
// Check for monthly limit error first - this should never be retried
132-
if Self::is_monthly_limit_error(ctx) {
64+
let Some(body_str) = Self::extract_response_body(ctx) else {
65+
return RetryAction::NoActionIndicated;
66+
};
67+
68+
if Self::is_monthly_limit_error(body_str) {
13369
return RetryAction::RetryForbidden;
13470
}
13571

136-
// Check for service overloaded error - this should be treated as throttling
137-
if Self::is_service_overloaded_error(ctx) {
72+
if Self::is_service_overloaded_error(ctx, body_str) {
13873
return RetryAction::throttling_error();
13974
}
14075

141-
// No specific action for other errors
14276
RetryAction::NoActionIndicated
14377
}
14478

@@ -173,10 +107,9 @@ mod tests {
173107
let classifier = QCliRetryClassifier::new();
174108
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
175109

176-
// Create a response with MONTHLY_REQUEST_COUNT in the body
177-
let response_body = r#"{"error": "MONTHLY_REQUEST_COUNT exceeded"}"#;
110+
let response_body = r#"{"__type":"ThrottlingException","message":"Maximum Request reached for this month.","reason":"MONTHLY_REQUEST_COUNT"}"#;
178111
let response = Response::builder()
179-
.status(429)
112+
.status(400)
180113
.body(response_body)
181114
.unwrap()
182115
.map(SdkBody::from);
@@ -188,15 +121,13 @@ mod tests {
188121
}
189122

190123
#[test]
191-
fn test_insufficient_model_capacity_error_classification() {
124+
fn test_service_unavailable_exception_classification() {
192125
let classifier = QCliRetryClassifier::new();
193126
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
194127

195-
// Create a 429 response with the insufficient model capacity message - should be treated as service
196-
// overloaded
197-
let response_body = r#"{"error": "I am experiencing high traffic, please try again shortly."}"#;
128+
let response_body = r#"{"__type":"ServiceUnavailableException","message":"The service is temporarily unavailable. Please try again later."}"#;
198129
let response = Response::builder()
199-
.status(429)
130+
.status(500)
200131
.body(response_body)
201132
.unwrap()
202133
.map(SdkBody::from);
@@ -208,32 +139,10 @@ mod tests {
208139
}
209140

210141
#[test]
211-
fn test_429_error_without_insufficient_capacity_message_not_retried() {
212-
let classifier = QCliRetryClassifier::new();
213-
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
214-
215-
// Create a 429 response without the specific insufficient model capacity message - should NOT be
216-
// retried
217-
let response_body = "Too Many Requests - some other error";
218-
let response = Response::builder()
219-
.status(429)
220-
.body(response_body)
221-
.unwrap()
222-
.map(SdkBody::from);
223-
224-
ctx.set_response(response.try_into().unwrap());
225-
226-
let result = classifier.classify_retry(&ctx);
227-
assert_eq!(result, RetryAction::NoActionIndicated);
228-
}
229-
230-
#[test]
231-
fn test_service_overloaded_error_classification() {
142+
fn test_high_load_error_classification() {
232143
let classifier = QCliRetryClassifier::new();
233144
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
234145

235-
// Create a 500 response with the specific high load message - should be treated as service
236-
// overloaded
237146
let response_body =
238147
r#"{"error": "Encountered unexpectedly high load when processing the request, please try again."}"#;
239148
let response = Response::builder()
@@ -249,12 +158,11 @@ mod tests {
249158
}
250159

251160
#[test]
252-
fn test_500_error_without_high_load_message_not_retried() {
161+
fn test_500_error_without_specific_message_not_retried() {
253162
let classifier = QCliRetryClassifier::new();
254163
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
255164

256-
// Create a 500 response without the specific high load message - should NOT be retried
257-
let response_body = "Internal Server Error - some other error";
165+
let response_body = r#"{"__type":"InternalServerException","message":"Some other error"}"#;
258166
let response = Response::builder()
259167
.status(500)
260168
.body(response_body)
@@ -268,30 +176,10 @@ mod tests {
268176
}
269177

270178
#[test]
271-
fn test_service_unavailable_error_classification() {
179+
fn test_no_action_for_other_status_codes() {
272180
let classifier = QCliRetryClassifier::new();
273181
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
274182

275-
// Create a 503 response - should be treated as service overloaded
276-
let response_body = "Service Unavailable";
277-
let response = Response::builder()
278-
.status(503)
279-
.body(response_body)
280-
.unwrap()
281-
.map(SdkBody::from);
282-
283-
ctx.set_response(response.try_into().unwrap());
284-
285-
let result = classifier.classify_retry(&ctx);
286-
assert_eq!(result, RetryAction::throttling_error());
287-
}
288-
289-
#[test]
290-
fn test_no_action_for_non_overload_errors() {
291-
let classifier = QCliRetryClassifier::new();
292-
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
293-
294-
// Create a 400 response - should not be treated as service overloaded
295183
let response = Response::builder()
296184
.status(400)
297185
.body("Bad Request")
@@ -303,53 +191,4 @@ mod tests {
303191
let result = classifier.classify_retry(&ctx);
304192
assert_eq!(result, RetryAction::NoActionIndicated);
305193
}
306-
307-
#[test]
308-
fn test_fail_fast_for_non_service_overload_status_codes() {
309-
let classifier = QCliRetryClassifier::new();
310-
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
311-
312-
// Test various status codes that are not in SERVICE_OVERLOAD_STATUS_CODES
313-
let test_cases = vec![
314-
(200, "OK"),
315-
(400, "Bad Request"),
316-
(401, "Unauthorized"),
317-
(403, "Forbidden"),
318-
(404, "Not Found"),
319-
(502, "Bad Gateway"),
320-
];
321-
322-
for (status_code, body) in test_cases {
323-
let response = Response::builder()
324-
.status(status_code)
325-
.body(body)
326-
.unwrap()
327-
.map(SdkBody::from);
328-
329-
ctx.set_response(response.try_into().unwrap());
330-
331-
let result = classifier.classify_retry(&ctx);
332-
assert_eq!(
333-
result,
334-
RetryAction::NoActionIndicated,
335-
"Status code {} should return NoActionIndicated",
336-
status_code
337-
);
338-
}
339-
}
340-
341-
#[test]
342-
fn test_classifier_priority() {
343-
let priority = QCliRetryClassifier::priority();
344-
let transient_priority = RetryClassifierPriority::transient_error_classifier();
345-
346-
// Our classifier should have higher priority than the transient error classifier
347-
assert!(priority > transient_priority);
348-
}
349-
350-
#[test]
351-
fn test_classifier_name() {
352-
let classifier = QCliRetryClassifier::new();
353-
assert_eq!(classifier.name(), "Q CLI Custom Retry Classifier");
354-
}
355194
}

0 commit comments

Comments
 (0)