@@ -1398,6 +1398,7 @@ async def _chat_completions_code_assist(
13981398 processed_messages : list [Any ],
13991399 effective_model : str ,
14001400 _in_graceful_degradation : bool = False ,
1401+ _auth_retry_attempted : bool = False ,
14011402 ** kwargs : Any ,
14021403 ) -> ResponseEnvelope | StreamingResponseEnvelope :
14031404 """Handle chat completions using the Code Assist API.
@@ -1486,6 +1487,44 @@ async def _chat_completions_code_assist(
14861487 )
14871488
14881489 except AuthenticationError as e :
1490+ # Handle 401 authentication errors with token refresh and retry
1491+ if not _auth_retry_attempted :
1492+ logger .info (
1493+ "Received 401 Unauthorized in non-streaming request, attempting token refresh and retry..."
1494+ )
1495+ try :
1496+ # Use 30s timeout for refresh, leaving room for retry request
1497+ AUTH_RETRY_TIMEOUT = 30.0
1498+ refreshed = await asyncio .wait_for (
1499+ self ._refresh_token_if_needed (),
1500+ timeout = AUTH_RETRY_TIMEOUT ,
1501+ )
1502+ if refreshed :
1503+ logger .info (
1504+ "Token refresh successful, retrying non-streaming request..."
1505+ )
1506+ return await self ._chat_completions_code_assist (
1507+ request_data = request_data ,
1508+ processed_messages = processed_messages ,
1509+ effective_model = effective_model ,
1510+ _in_graceful_degradation = _in_graceful_degradation ,
1511+ _auth_retry_attempted = True , # Prevent infinite retry loops
1512+ ** kwargs ,
1513+ )
1514+ else :
1515+ logger .warning (
1516+ "Token refresh failed; will raise 401 error to caller"
1517+ )
1518+ except asyncio .TimeoutError :
1519+ logger .warning (
1520+ f"Token refresh timed out after { AUTH_RETRY_TIMEOUT } s; raising 401 to caller"
1521+ )
1522+ except Exception as refresh_error :
1523+ logger .error (
1524+ f"Error during token refresh attempt: { refresh_error } " ,
1525+ exc_info = True ,
1526+ )
1527+ # If we reach here, refresh failed or already retried - raise original error
14891528 logger .error (f"Authentication error during API call: { e } " , exc_info = True )
14901529 raise
14911530 except BackendError as e :
@@ -1561,6 +1600,7 @@ async def stream_generator(
15611600 * ,
15621601 _allow_tool_retry : bool = True ,
15631602 without_tools : bool = False ,
1603+ _auth_retry_attempted : bool = False ,
15641604 ) -> AsyncGenerator [ProcessedResponse , None ]:
15651605 import json
15661606
@@ -1727,9 +1767,54 @@ def _build_error_chunk(
17271767 code = "quota_exceeded"
17281768 elif response .status_code == 429 :
17291769 code = "rate_limit_exceeded"
1770+ elif response .status_code == 401 :
1771+ code = "auth_error"
17301772 elif isinstance (error_detail , str ) and error_detail .strip ():
17311773 error_message = error_detail
17321774
1775+ # Handle 401 authentication errors with token refresh and retry
1776+ if response .status_code == 401 and not _auth_retry_attempted :
1777+ logger .info (
1778+ "Received 401 Unauthorized from backend, attempting token refresh and retry..."
1779+ )
1780+ with contextlib .suppress (Exception ):
1781+ response .close ()
1782+
1783+ # Trigger proactive token refresh with timeout
1784+ # Use 30s timeout for refresh, leaving room for retry request
1785+ AUTH_RETRY_TIMEOUT = 30.0
1786+ try :
1787+ refreshed = await asyncio .wait_for (
1788+ self ._refresh_token_if_needed (),
1789+ timeout = AUTH_RETRY_TIMEOUT ,
1790+ )
1791+ if refreshed :
1792+ logger .info (
1793+ "Token refresh successful, retrying streaming request..."
1794+ )
1795+ # Recursively call stream_generator with retry flag set
1796+ async for retry_chunk in stream_generator (
1797+ _allow_tool_retry = _allow_tool_retry ,
1798+ without_tools = without_tools ,
1799+ _auth_retry_attempted = True , # Prevent infinite retry loops
1800+ ):
1801+ yield retry_chunk
1802+ return # Successfully handled via retry
1803+ else :
1804+ logger .warning (
1805+ "Token refresh failed; will return 401 error to client"
1806+ )
1807+ except asyncio .TimeoutError :
1808+ logger .warning (
1809+ f"Token refresh timed out after { AUTH_RETRY_TIMEOUT } s; returning 401 to client"
1810+ )
1811+ except Exception as refresh_error :
1812+ logger .error (
1813+ f"Error during token refresh attempt: { refresh_error } " ,
1814+ exc_info = True ,
1815+ )
1816+ # If we reach here, refresh failed - continue to raise error below
1817+
17331818 # Attach retry-after hint when available
17341819 retry_delay = None
17351820 if response .status_code == 429 :
0 commit comments