matdev83
diff --git a/‎src/core/services/backend_service.py‎
Lines changed: 5 additions & 52 deletions b/‎src/core/services/backend_service.py‎
Lines changed: 5 additions & 52 deletions
diff --git a/‎src/core/services/tool_call_handlers/pytest_context_saving_handler.py‎
Lines changed: 5 additions & 3 deletions b/‎src/core/services/tool_call_handlers/pytest_context_saving_handler.py‎
Lines changed: 5 additions & 3 deletions
@@ -102,8 +102,6 @@ def __init__(
         )
         # Registry for permanently disabled backends {backend_type: {reason, timestamp}}
         self._disabled_backends: dict[str, dict[str, Any]] = {}
-        # Per-backend exponential backoff after rate limit errors
-        self._rate_limit_backoff: dict[str, tuple[float, float]] = {}
         from src.core.config.app_config import AppConfig
         from src.core.services.failover_coordinator import FailoverCoordinator
 
@@ -1228,8 +1226,7 @@ async def call_completion(
                     ),
                 )
 
-        # Honor any active rate-limit backoff before proceeding (legacy)
-        await self._enforce_rate_limit_backoff(backend_type)
+        # Rate limiting is now handled by the ResilienceCoordinator above
 
         rate_key = f"backend:{backend_type}"
         limit_info = await self._rate_limiter.check_limit(rate_key)
@@ -1840,12 +1837,9 @@ async def _inject_session_id() -> Any:
                 )
 
         except (BackendError, RateLimitExceededError, LLMProxyError) as exc:
-            # Record failure in resilience coordinator
+            # Record failure in resilience coordinator (handles cooldown/backoff)
             if self._resilience:
                 self._resilience.record_failure(backend_type, effective_model, exc)
-
-            if isinstance(exc, RateLimitExceededError):
-                await self._register_rate_limit_backoff(backend_type, exc)
             # Propagate expected exceptions as-is
             raise
         except Exception as e:
@@ -1874,50 +1868,9 @@ async def validate_backend_and_model(
                 )
             return False, f"Backend validation failed: {e!s}"
 
-    async def _enforce_rate_limit_backoff(self, backend_type: str) -> None:
-        """Delay if this backend is in a rate-limit backoff window."""
-        backoff = self._rate_limit_backoff.get(backend_type)
-        if not backoff:
-            return
-        wait_until, delay = backoff
-        remaining = wait_until - time.time()
-        if remaining > 0:
-            try:
-                await asyncio.sleep(min(remaining, delay))
-            except Exception:
-                if logger.isEnabledFor(logging.DEBUG):
-                    logger.debug(
-                        "Backoff sleep interrupted for backend %s",
-                        backend_type,
-                        exc_info=True,
-                    )
-        # If the window has passed, clear it
-        if time.time() >= wait_until:
-            self._rate_limit_backoff.pop(backend_type, None)
-
-    async def _register_rate_limit_backoff(
-        self, backend_type: str, error: RateLimitExceededError
-    ) -> None:
-        """Register exponential backoff after a 429 to avoid rapid retries."""
-        _, prev_delay = self._rate_limit_backoff.get(backend_type, (0.0, 0.0))
-        next_delay = prev_delay * 2 if prev_delay else 2.0
-        next_delay = min(next_delay, 60.0)
-
-        reset_at = getattr(error, "reset_at", None)
-        if reset_at and reset_at > time.time():
-            retry_after = reset_at - time.time()
-            next_delay = max(next_delay, retry_after)
-
-        wait_until = time.time() + next_delay
-        self._rate_limit_backoff[backend_type] = (wait_until, next_delay)
-
-        if logger.isEnabledFor(logging.WARNING):
-            logger.warning(
-                "Rate limit hit for backend %s; backing off for %.1fs (next window until %.1f)",
-                backend_type,
-                next_delay,
-                wait_until,
-            )
+    # NOTE: Legacy rate limit backoff methods (_enforce_rate_limit_backoff,
+    # _register_rate_limit_backoff) have been removed. Rate limiting is now
+    # handled by the ResilienceCoordinator via the resilience layer.
 
     async def _get_or_create_backend(
         self, backend_type: str, session_id: str | None = None
 
@@ -49,8 +49,8 @@ def _extract_command(arguments: Any) -> str | None:
             parsed = json.loads(arguments)
             arguments = parsed
         except (ValueError, TypeError):
-            # Plain string
-            return arguments
+            # Plain string - type narrowing tells us it's still str
+            return str(arguments)
 
     # If dict, try common fields
     if isinstance(arguments, dict):
@@ -66,13 +66,15 @@ def _extract_command(arguments: Any) -> str | None:
                 sub = inner.get("command") or inner.get("cmd")
                 if isinstance(sub, str) and sub.strip():
                     return sub
-        # If args array provided, join into a single string
+        # If args provided (list or string), handle it
         args = arguments.get("args")
         if isinstance(args, list) and args:
             try:
                 return " ".join(str(a) for a in args)
             except Exception:
                 return None
+        if isinstance(args, str) and args.strip():
+            return args
         return None
 
     # If list/tuple, join