Skip to content

Commit 3619cef

Browse files
committed
feat(network): add retry with exponential backoff (R6) and rate limiting (R7) for LLM API calls
1 parent 3452605 commit 3619cef

File tree

9 files changed

+1346
-177
lines changed

9 files changed

+1346
-177
lines changed

ROADMAP.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,15 @@ graph TD
285285
- **Prerequisite**: Works best with A6 Pipeline architecture
286286
- **Note**: Requires A/B testing to ensure analysis accuracy ≥ 95%
287287
- [ ] **R5**: Frontend error boundaries - 🟡 Medium ⚠️ *Use dev branch*
288-
- [ ] **R6**: Network retry logic with exponential backoff - 🟢 Low
289-
- [ ] **R7**: Rate limiting for LLM API calls - 🟢 Low
288+
- [x] **R6**: Network retry logic with exponential backoff - 🟢 Low ✅ *Completed (2026-01-10)*
289+
- Created `llm_client.py` with safe retry logic, idempotency keys, timeout budget
290+
- Exponential backoff with jitter, Retry-After header support
291+
- Pre-stream retry for SSE endpoints (no retry after streaming begins)
292+
- [x] **R7**: Rate limiting for LLM API calls - 🟢 Low ✅ *Completed (2026-01-10)*
293+
- Created `rate_limiter.py` (RateLimiter + ConcurrencyLimiter)
294+
- Core limiter (30/min) for `/doctor/analyze`, `/doctor/chat`
295+
- Light limiter (10/min) for `/doctor/verify_key`, `/doctor/list_models`
296+
- Concurrency semaphore (max 3 simultaneous LLM requests)
290297
- [x] **R11**: Fix validation error capture to collect all failures - 🟢 Low ✅ *Completed (2025-12-31)*
291298
- Modified logger to accumulate multiple "Failed to validate prompt" errors
292299
- Use "Executing prompt:" as completion marker instead of resetting buffer
@@ -711,7 +718,7 @@ graph TD
711718
- Testing: 159/159 Python tests passed; full Playwright suite 61/61 passed (latest)
712719
- i18n: Fully translated across all 9 languages
713720
- See `.planning/260104-F4_STATISTICS_RECORD.md` for implementation details
714-
- [ ] **R6-R7** Network reliability improvements
721+
- [x] **R6-R7** Network reliability improvements ✅ *Completed (2026-01-10)* (see §3.2 `R6`, `R7`)
715722
- [ ] **T2-T5** Comprehensive testing suite
716723
717724
**Chat Interface Improvements**:

__init__.py

Lines changed: 272 additions & 148 deletions
Large diffs are not rendered by default.

config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,16 @@ class DiagnosticsConfig:
4343
# Telemetry (S3)
4444
telemetry_enabled: bool = False # Opt-in: disabled by default
4545

46+
# R7: Rate limiting
47+
llm_core_rate_limit: int = 30 # req/min for analyze, chat
48+
llm_light_rate_limit: int = 10 # req/min for verify_key, list_models
49+
llm_max_concurrent: int = 3 # max simultaneous LLM requests
50+
51+
# R6: Retry configuration
52+
llm_max_retries: int = 2
53+
llm_request_timeout: float = 60.0 # per-attempt timeout (seconds)
54+
llm_total_timeout: float = 180.0 # total operation timeout (seconds)
55+
4656
def to_dict(self) -> dict:
4757
"""Convert config to dictionary."""
4858
return asdict(self)

llm_client.py

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
"""
2+
LLM Client with retry logic, exponential backoff, and safety guards.
3+
4+
R6 Implementation: Network retry logic with exponential backoff.
5+
6+
Integrates with:
7+
- outbound.py (SSRF protection, payload sanitization)
8+
- session_manager.py (shared session, rate/concurrency limits)
9+
10+
IMPORTANT SAFETY NOTES:
11+
- POST requests may cause duplicate charges if retried incorrectly
12+
- Streaming requests should NOT be retried after connection established
13+
- All requests use allow_redirects=False (SSRF protection)
14+
"""
15+
16+
import asyncio
17+
import aiohttp
18+
import uuid
19+
import time
20+
import random
21+
import logging
22+
from dataclasses import dataclass, field
23+
from typing import Optional, Tuple, Dict, Any
24+
25+
logger = logging.getLogger(__name__)
26+
27+
28+
@dataclass
29+
class RetryConfig:
30+
"""Configuration for retry behavior."""
31+
32+
# Retry limits
33+
max_retries: int = 2 # Conservative default
34+
35+
# Timing
36+
base_delay: float = 1.0 # Initial delay (seconds)
37+
max_delay: float = 30.0 # Cap on delay
38+
exponential_base: float = 2.0 # Backoff multiplier
39+
40+
# Timeouts
41+
request_timeout_seconds: float = 60.0 # Per-attempt timeout
42+
total_timeout_seconds: float = 180.0 # Entire operation timeout
43+
44+
# Retryable conditions
45+
retryable_network_errors: bool = True # ClientError (timeout, refused)
46+
retry_on_429: bool = True # Rate limit (with Retry-After)
47+
retry_on_5xx: bool = False # Disabled by default (risk of dupe)
48+
max_5xx_retries: int = 1 # If enabled, limit to 1
49+
retryable_5xx_codes: Tuple[int, ...] = (502, 503, 504) # Exclude 500
50+
51+
# Idempotency (OpenAI standard)
52+
add_idempotency_key: bool = True
53+
idempotency_header: str = "Idempotency-Key"
54+
55+
56+
@dataclass
57+
class RetryResult:
58+
"""Result of a retry-aware request."""
59+
response: Optional[aiohttp.ClientResponse]
60+
success: bool
61+
attempts: int
62+
total_time: float
63+
error: Optional[str] = None
64+
idempotency_key: Optional[str] = None
65+
66+
67+
def _safe_release(response: aiohttp.ClientResponse) -> None:
68+
"""
69+
Safely release response resources.
70+
71+
Note: response.close() is synchronous in aiohttp.
72+
This prevents connection pool leaks when retrying.
73+
"""
74+
try:
75+
response.close()
76+
except Exception:
77+
pass
78+
79+
80+
def _get_retry_after(
81+
response: aiohttp.ClientResponse,
82+
config: RetryConfig,
83+
attempt: int
84+
) -> float:
85+
"""Parse Retry-After header or calculate backoff."""
86+
retry_after = response.headers.get("Retry-After")
87+
if retry_after:
88+
try:
89+
return min(float(retry_after), config.max_delay)
90+
except ValueError:
91+
pass
92+
return _calculate_backoff(attempt, config)
93+
94+
95+
def _calculate_backoff(attempt: int, config: RetryConfig) -> float:
96+
"""Calculate exponential backoff with jitter."""
97+
delay = config.base_delay * (config.exponential_base ** (attempt - 1))
98+
jitter = random.uniform(0, delay * 0.1) # 10% jitter
99+
return min(delay + jitter, config.max_delay)
100+
101+
102+
async def llm_request_with_retry(
103+
session: aiohttp.ClientSession,
104+
method: str,
105+
url: str,
106+
*,
107+
json: Optional[Dict[str, Any]] = None,
108+
headers: Optional[Dict[str, str]] = None,
109+
config: Optional[RetryConfig] = None,
110+
is_streaming: bool = False,
111+
) -> RetryResult:
112+
"""
113+
Make HTTP request with safe retry logic.
114+
115+
Args:
116+
session: aiohttp ClientSession to use
117+
method: HTTP method (GET, POST, etc.)
118+
url: Target URL
119+
json: JSON payload (optional)
120+
headers: HTTP headers (optional)
121+
config: Retry configuration (optional, uses defaults)
122+
is_streaming: If True, marks this as a streaming request
123+
124+
Returns:
125+
RetryResult with response, success status, and metadata
126+
127+
IMPORTANT:
128+
- For streaming: retries are DISABLED after connection established
129+
- Caller must use `async with result.response:` for proper cleanup
130+
- All requests use allow_redirects=False (SSRF protection)
131+
132+
Example:
133+
result = await llm_request_with_retry(session, "POST", url, json=payload)
134+
if result.success:
135+
async with result.response as resp:
136+
data = await resp.json()
137+
"""
138+
config = config or RetryConfig()
139+
headers = dict(headers) if headers else {}
140+
141+
# Add idempotency key for POST requests
142+
idempotency_key: Optional[str] = None
143+
if config.add_idempotency_key and method.upper() == "POST":
144+
idempotency_key = str(uuid.uuid4())
145+
headers[config.idempotency_header] = idempotency_key
146+
logger.debug(f"Added idempotency key: {idempotency_key}")
147+
148+
start_time = time.monotonic()
149+
attempts = 0
150+
last_error: Optional[str] = None
151+
response: Optional[aiohttp.ClientResponse] = None
152+
153+
while attempts <= config.max_retries:
154+
# Check total timeout budget
155+
elapsed = time.monotonic() - start_time
156+
if elapsed >= config.total_timeout_seconds:
157+
logger.warning(f"Total timeout exceeded ({config.total_timeout_seconds}s)")
158+
return RetryResult(
159+
response=None,
160+
success=False,
161+
attempts=attempts,
162+
total_time=elapsed,
163+
error=f"Total timeout exceeded ({config.total_timeout_seconds}s)",
164+
idempotency_key=idempotency_key,
165+
)
166+
167+
attempts += 1
168+
remaining_time = config.total_timeout_seconds - elapsed
169+
per_request_timeout = min(config.request_timeout_seconds, remaining_time)
170+
171+
try:
172+
timeout = aiohttp.ClientTimeout(total=per_request_timeout)
173+
response = await session.request(
174+
method,
175+
url,
176+
json=json,
177+
headers=headers,
178+
allow_redirects=False, # SSRF protection (required)
179+
timeout=timeout,
180+
)
181+
182+
# === Success path (2xx, 3xx) ===
183+
if response.status < 400:
184+
logger.debug(f"Request succeeded on attempt {attempts}")
185+
return RetryResult(
186+
response=response,
187+
success=True,
188+
attempts=attempts,
189+
total_time=time.monotonic() - start_time,
190+
idempotency_key=idempotency_key,
191+
)
192+
193+
# === 429 Rate Limit ===
194+
if response.status == 429 and config.retry_on_429:
195+
delay = _get_retry_after(response, config, attempts)
196+
_safe_release(response)
197+
if attempts <= config.max_retries:
198+
logger.warning(
199+
f"429 Rate Limited, waiting {delay:.1f}s "
200+
f"(attempt {attempts}/{config.max_retries + 1})"
201+
)
202+
await asyncio.sleep(delay)
203+
continue
204+
else:
205+
return RetryResult(
206+
response=None,
207+
success=False,
208+
attempts=attempts,
209+
total_time=time.monotonic() - start_time,
210+
error="Rate limit exceeded after max retries",
211+
idempotency_key=idempotency_key,
212+
)
213+
214+
# === 5xx Server Error ===
215+
if response.status in config.retryable_5xx_codes and config.retry_on_5xx:
216+
if attempts <= config.max_5xx_retries:
217+
delay = _calculate_backoff(attempts, config)
218+
_safe_release(response)
219+
logger.warning(
220+
f"{response.status} Server Error, retry in {delay:.1f}s "
221+
f"(attempt {attempts}/{config.max_5xx_retries + 1})"
222+
)
223+
await asyncio.sleep(delay)
224+
continue
225+
226+
# === Non-retryable status (4xx except 429, 500, or exhausted retries) ===
227+
logger.warning(f"Non-retryable status {response.status}")
228+
return RetryResult(
229+
response=response,
230+
success=False,
231+
attempts=attempts,
232+
total_time=time.monotonic() - start_time,
233+
error=f"HTTP {response.status}",
234+
idempotency_key=idempotency_key,
235+
)
236+
237+
except asyncio.TimeoutError:
238+
last_error = f"Request timeout ({per_request_timeout:.1f}s)"
239+
logger.warning(f"Timeout on attempt {attempts}")
240+
if config.retryable_network_errors and attempts <= config.max_retries:
241+
delay = _calculate_backoff(attempts, config)
242+
logger.info(f"Retrying in {delay:.1f}s")
243+
await asyncio.sleep(delay)
244+
continue
245+
break
246+
247+
except aiohttp.ClientError as e:
248+
last_error = str(e)
249+
logger.warning(f"Network error on attempt {attempts}: {e}")
250+
if config.retryable_network_errors and attempts <= config.max_retries:
251+
delay = _calculate_backoff(attempts, config)
252+
logger.info(f"Retrying in {delay:.1f}s")
253+
await asyncio.sleep(delay)
254+
continue
255+
break
256+
257+
# Exhausted all retries
258+
logger.error(f"Request failed after {attempts} attempts: {last_error}")
259+
return RetryResult(
260+
response=response,
261+
success=False,
262+
attempts=attempts,
263+
total_time=time.monotonic() - start_time,
264+
error=last_error,
265+
idempotency_key=idempotency_key,
266+
)
267+
268+
269+
# Convenience function for non-streaming POST requests
270+
async def llm_post_with_retry(
271+
session: aiohttp.ClientSession,
272+
url: str,
273+
*,
274+
json: Optional[Dict[str, Any]] = None,
275+
headers: Optional[Dict[str, str]] = None,
276+
config: Optional[RetryConfig] = None,
277+
) -> RetryResult:
278+
"""
279+
Convenience wrapper for POST requests with retry.
280+
281+
See llm_request_with_retry for full documentation.
282+
"""
283+
return await llm_request_with_retry(
284+
session, "POST", url,
285+
json=json, headers=headers, config=config, is_streaming=False
286+
)

0 commit comments

Comments
 (0)