Skip to content

Commit 6cfe4cf

Browse files
authored
feat: add retry logic, and treat 5xx errors as throttling to enable adaptive rate-limiting (#2427)
1 parent 70d580f commit 6cfe4cf

File tree

2 files changed

+224
-1
lines changed

2 files changed

+224
-1
lines changed

crates/chat-cli/src/api_client/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ mod error;
55
pub mod model;
66
mod opt_out;
77
pub mod profile;
8+
mod retry_classifier;
89
pub mod send_message_output;
910

1011
use std::sync::Arc;
@@ -147,6 +148,7 @@ impl ApiClient {
147148
.app_name(app_name())
148149
.endpoint_url(endpoint.url())
149150
.stalled_stream_protection(stalled_stream_protection_config())
151+
.retry_classifier(retry_classifier::QCliRetryClassifier::new())
150152
.build(),
151153
));
152154
},
@@ -160,6 +162,7 @@ impl ApiClient {
160162
.app_name(app_name())
161163
.endpoint_url(endpoint.url())
162164
.stalled_stream_protection(stalled_stream_protection_config())
165+
.retry_classifier(retry_classifier::QCliRetryClassifier::new())
163166
.build(),
164167
));
165168
},
@@ -496,7 +499,7 @@ fn timeout_config(database: &Database) -> TimeoutConfig {
496499
}
497500

498501
fn retry_config() -> RetryConfig {
499-
RetryConfig::standard().with_max_attempts(1)
502+
RetryConfig::adaptive().with_max_attempts(3)
500503
}
501504

502505
pub fn stalled_stream_protection_config() -> StalledStreamProtectionConfig {
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
use std::fmt;
2+
3+
use aws_smithy_runtime_api::client::interceptors::context::InterceptorContext;
4+
use aws_smithy_runtime_api::client::retries::classifiers::{
5+
ClassifyRetry,
6+
RetryAction,
7+
RetryClassifierPriority,
8+
};
9+
use tracing::debug;
10+
11+
/// Error marker for monthly limit exceeded errors
12+
const MONTHLY_LIMIT_ERROR_MARKER: &str = "MONTHLY_REQUEST_COUNT";
13+
14+
/// Status codes that indicate service overload/unavailability and should be retried
15+
const SERVICE_OVERLOAD_STATUS_CODES: &[u16] = &[
16+
500, // Internal Server Error - server encountered an unexpected condition
17+
503, // Service Unavailable - server is temporarily overloaded or under maintenance
18+
];
19+
20+
/// Custom retry classifier for Q CLI specific error handling.
21+
///
22+
/// This classifier handles two specific cases:
23+
/// 1. Monthly limit reached errors - classified as RetryForbidden
24+
/// 2. Model unavailable errors - classified as ThrottlingError
25+
#[derive(Debug, Default)]
26+
pub struct QCliRetryClassifier;
27+
28+
impl QCliRetryClassifier {
29+
pub fn new() -> Self {
30+
Self
31+
}
32+
33+
/// Return the priority of this retry classifier.
34+
///
35+
/// We want this to run after the standard classifiers but with high priority
36+
/// to override their decisions for our specific error cases.
37+
///
38+
/// # Returns
39+
/// A priority that runs after the transient error classifier but can override its decisions.
40+
pub fn priority() -> RetryClassifierPriority {
41+
RetryClassifierPriority::run_after(RetryClassifierPriority::transient_error_classifier())
42+
}
43+
44+
/// Check if the error indicates a monthly limit has been reached
45+
fn is_monthly_limit_error(ctx: &InterceptorContext) -> bool {
46+
let Some(resp) = ctx.response() else {
47+
debug!("QCliRetryClassifier: No response available in context");
48+
return false;
49+
};
50+
51+
// Check status code first - monthly limit errors typically return 429 (Too Many Requests)
52+
let status_code = resp.status().as_u16();
53+
if status_code != 429 {
54+
debug!(
55+
"QCliRetryClassifier: Status code {} is not 429, not a monthly limit error",
56+
status_code
57+
);
58+
return false;
59+
}
60+
61+
let Some(bytes) = resp.body().bytes() else {
62+
debug!("QCliRetryClassifier: No response body available");
63+
return false;
64+
};
65+
66+
match std::str::from_utf8(bytes) {
67+
Ok(body_str) => body_str.contains(MONTHLY_LIMIT_ERROR_MARKER),
68+
Err(e) => {
69+
debug!("QCliRetryClassifier: Failed to parse response body as UTF-8: {}", e);
70+
false
71+
},
72+
}
73+
}
74+
75+
/// Check if the error indicates a model is unavailable due to high load
76+
fn is_service_overloaded_error(ctx: &InterceptorContext) -> bool {
77+
let Some(resp) = ctx.response() else {
78+
debug!("QCliRetryClassifier: No response available for service overload check");
79+
return false;
80+
};
81+
82+
let status_code = resp.status().as_u16();
83+
SERVICE_OVERLOAD_STATUS_CODES.contains(&status_code)
84+
}
85+
}
86+
87+
impl ClassifyRetry for QCliRetryClassifier {
88+
fn classify_retry(&self, ctx: &InterceptorContext) -> RetryAction {
89+
// Check for monthly limit error first - this should never be retried
90+
if Self::is_monthly_limit_error(ctx) {
91+
debug!("QCliRetryClassifier: Monthly limit error detected - returning RetryForbidden");
92+
return RetryAction::RetryForbidden;
93+
}
94+
95+
// Check for service overloaded error - this should be treated as throttling
96+
if Self::is_service_overloaded_error(ctx) {
97+
debug!("QCliRetryClassifier: Service overloaded error detected - returning throttling_error");
98+
return RetryAction::throttling_error();
99+
}
100+
101+
// No specific action for other errors
102+
RetryAction::NoActionIndicated
103+
}
104+
105+
fn name(&self) -> &'static str {
106+
"Q CLI Custom Retry Classifier"
107+
}
108+
109+
fn priority(&self) -> RetryClassifierPriority {
110+
Self::priority()
111+
}
112+
}
113+
114+
impl fmt::Display for QCliRetryClassifier {
115+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
116+
write!(f, "QCliRetryClassifier")
117+
}
118+
}
119+
120+
#[cfg(test)]
121+
mod tests {
122+
use aws_smithy_runtime_api::client::interceptors::context::{
123+
Input,
124+
InterceptorContext,
125+
};
126+
use aws_smithy_types::body::SdkBody;
127+
use http::Response;
128+
129+
use super::*;
130+
131+
#[test]
132+
fn test_monthly_limit_error_classification() {
133+
let classifier = QCliRetryClassifier::new();
134+
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
135+
136+
// Create a response with MONTHLY_REQUEST_COUNT in the body
137+
let response_body = r#"{"error": "MONTHLY_REQUEST_COUNT exceeded"}"#;
138+
let response = Response::builder()
139+
.status(429)
140+
.body(response_body)
141+
.unwrap()
142+
.map(SdkBody::from);
143+
144+
ctx.set_response(response.try_into().unwrap());
145+
146+
let result = classifier.classify_retry(&ctx);
147+
assert_eq!(result, RetryAction::RetryForbidden);
148+
}
149+
150+
#[test]
151+
fn test_service_overloaded_error_classification() {
152+
let classifier = QCliRetryClassifier::new();
153+
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
154+
155+
// Create a 500 response - should be treated as service overloaded
156+
let response_body = "Internal Server Error";
157+
let response = Response::builder()
158+
.status(500)
159+
.body(response_body)
160+
.unwrap()
161+
.map(SdkBody::from);
162+
163+
ctx.set_response(response.try_into().unwrap());
164+
165+
let result = classifier.classify_retry(&ctx);
166+
assert_eq!(result, RetryAction::throttling_error());
167+
}
168+
169+
#[test]
170+
fn test_service_unavailable_error_classification() {
171+
let classifier = QCliRetryClassifier::new();
172+
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
173+
174+
// Create a 503 response - should be treated as service overloaded
175+
let response_body = "Service Unavailable";
176+
let response = Response::builder()
177+
.status(503)
178+
.body(response_body)
179+
.unwrap()
180+
.map(SdkBody::from);
181+
182+
ctx.set_response(response.try_into().unwrap());
183+
184+
let result = classifier.classify_retry(&ctx);
185+
assert_eq!(result, RetryAction::throttling_error());
186+
}
187+
188+
#[test]
189+
fn test_no_action_for_non_overload_errors() {
190+
let classifier = QCliRetryClassifier::new();
191+
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
192+
193+
// Create a 400 response - should not be treated as service overloaded
194+
let response = Response::builder()
195+
.status(400)
196+
.body("Bad Request")
197+
.unwrap()
198+
.map(SdkBody::from);
199+
200+
ctx.set_response(response.try_into().unwrap());
201+
202+
let result = classifier.classify_retry(&ctx);
203+
assert_eq!(result, RetryAction::NoActionIndicated);
204+
}
205+
206+
#[test]
207+
fn test_classifier_priority() {
208+
let priority = QCliRetryClassifier::priority();
209+
let transient_priority = RetryClassifierPriority::transient_error_classifier();
210+
211+
// Our classifier should have higher priority than the transient error classifier
212+
assert!(priority > transient_priority);
213+
}
214+
215+
#[test]
216+
fn test_classifier_name() {
217+
let classifier = QCliRetryClassifier::new();
218+
assert_eq!(classifier.name(), "Q CLI Custom Retry Classifier");
219+
}
220+
}

0 commit comments

Comments
 (0)