matdev83
diff --git a/‎src/anthropic_converters.py‎
Lines changed: 2 additions & 2 deletions b/‎src/anthropic_converters.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/connectors/cline.py‎
Lines changed: 92 additions & 0 deletions b/‎src/connectors/cline.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎src/connectors/gemini_base/connector.py‎
Lines changed: 48 additions & 1 deletion b/‎src/connectors/gemini_base/connector.py‎
Lines changed: 48 additions & 1 deletion
diff --git a/‎src/connectors/openai.py‎
Lines changed: 20 additions & 1 deletion b/‎src/connectors/openai.py‎
Lines changed: 20 additions & 1 deletion
@@ -256,7 +256,7 @@ def openai_to_anthropic_response(openai_response: Any) -> dict[str, Any]:
         # No choices and no explicit error - produce a message indicating
         # empty response. Use a clear message instead of empty string to
         # help debugging and prevent silent failures.
-        usage = oai_dict.get("usage", {})
+        usage = oai_dict.get("usage") or {}
         response = {
             "id": oai_dict.get("id", "msg_unk"),
             "type": "message",
@@ -282,7 +282,7 @@ def openai_to_anthropic_response(openai_response: Any) -> dict[str, Any]:
     choice = choices[0]
     message = choice.get("message", {})
     content_blocks = _build_content_blocks(choice, message)
-    usage = oai_dict.get("usage", {})
+    usage = oai_dict.get("usage") or {}
 
     # Map finish_reason to stop_reason
     finish_reason = choice.get("finish_reason")
 
@@ -140,6 +140,98 @@ async def initialize(self, **kwargs: Any) -> None:
 
         await super().initialize(**passthrough)
 
+    def _unwrap_cline_data_envelope(
+        self, response_json: dict[str, Any]
+    ) -> dict[str, Any]:
+        """
+        Unwrap Cline's non-standard 'data' envelope from responses.
+
+        Cline API wraps OpenAI-format responses in a 'data' key for non-streaming
+        requests. This method extracts the inner response to normalize it to
+        standard OpenAI format that the rest of the pipeline expects.
+        """
+        data_val = response_json.get("data")
+        if isinstance(data_val, dict):
+            # Only unwrap if the inner dict looks like a valid OpenAI response
+            if "choices" in data_val or "id" in data_val or "model" in data_val:
+                logger.debug(
+                    "Unwrapping Cline 'data' envelope - found keys: %s",
+                    list(data_val.keys())[:5],
+                )
+                return data_val
+        return response_json
+
+    async def _handle_non_streaming_response(
+        self,
+        url: str,
+        payload: dict[str, Any],
+        headers: dict[str, str] | None,
+        session_id: str,
+    ) -> ResponseEnvelope:
+        """
+        Override to handle Cline's non-standard response format.
+
+        Cline wraps responses in a 'data' envelope for non-streaming requests.
+        We unwrap this before passing to the parent handler.
+        """
+        from src.core.common.exceptions import ServiceUnavailableError
+        from src.core.security.loop_prevention import ensure_loop_guard_header
+
+        if not headers or not headers.get("Authorization"):
+            raise AuthenticationError(message="No auth credentials found")
+
+        guarded_headers = ensure_loop_guard_header(headers)
+
+        try:
+            response = await self.client.post(
+                url, json=payload, headers=guarded_headers
+            )
+        except httpx.RequestError as e:
+            logger.error(f"Cline request failed to {url}. Error: {e}")
+            raise ServiceUnavailableError(
+                message=f"Could not connect to Cline backend ({e})"
+            )
+
+        if int(response.status_code) >= 400:
+            try:
+                err = response.json()
+            except Exception:
+                err = response.text
+            raise HTTPException(status_code=response.status_code, detail=err)
+
+        response_json = response.json()
+
+        # Unwrap Cline's non-standard 'data' envelope
+        response_json = self._unwrap_cline_data_envelope(response_json)
+
+        # Debug log for troubleshooting
+        if logger.isEnabledFor(logging.DEBUG):
+            choices_count = len(response_json.get("choices", []))
+            response_id = response_json.get("id", "unknown")
+            response_model = response_json.get("model", "unknown")
+            logger.debug(
+                "Cline non-streaming response: id=%s model=%s choices_count=%d",
+                response_id,
+                response_model,
+                choices_count,
+            )
+
+        domain_response = self.translation_service.to_domain_response(
+            response_json, "openai"
+        )
+
+        try:
+            response_headers = dict(response.headers)
+        except Exception:
+            response_headers = {}
+
+        return ResponseEnvelope(
+            content=domain_response.model_dump(),
+            status_code=response.status_code,
+            headers=response_headers,
+            usage=domain_response.usage,
+        )
+
     async def chat_completions(
         self,
         request_data: DomainModel | InternalDTO | dict[str, Any],
 
@@ -1719,10 +1719,21 @@ async def _accumulate_streaming_response(
         finish_reason: str | None = None
         usage_data: dict[str, int] | None = None
         accumulated_reasoning: str = ""
+        error_data: dict[str, Any] | None = None
 
         def _process_openai_chunk(data: dict[str, Any]) -> None:
             """Process an OpenAI-style chunk and accumulate content."""
-            nonlocal accumulated_content, finish_reason, usage_data, accumulated_reasoning
+            nonlocal accumulated_content, finish_reason, usage_data, accumulated_reasoning, error_data
+
+            # Check for error in the chunk - this is critical for non-streaming
+            # requests where backend errors need to be properly propagated
+            if data.get("error"):
+                error_data = data.get("error")
+                # Also capture the finish_reason if present (usually "error")
+                choices = data.get("choices", [])
+                if choices and choices[0].get("finish_reason"):
+                    finish_reason = choices[0]["finish_reason"]
+                return
 
             choices = data.get("choices", [])
             if choices:
@@ -1830,6 +1841,42 @@ def _process_openai_chunk(data: dict[str, Any]) -> None:
 
         except Exception as e:
             logger.warning(f"Error accumulating streaming response: {e}", exc_info=True)
+            # Capture the exception as an error to propagate to the client
+            if error_data is None:
+                error_data = {
+                    "message": f"Error processing response: {e}",
+                    "type": "internal_error",
+                    "code": 500,
+                }
+
+        # If an error was encountered during streaming, return an error response
+        # This is critical for non-streaming requests where the client waits for
+        # a complete response and needs to know about backend failures
+        if error_data:
+            error_status_code = error_data.get("code", 500)
+            if isinstance(error_status_code, str):
+                try:
+                    error_status_code = int(error_status_code)
+                except ValueError:
+                    error_status_code = 500
+
+            error_response: dict[str, Any] = {
+                "id": f"chatcmpl-error-{uuid.uuid4().hex[:8]}",
+                "object": "chat.completion",
+                "created": int(time.time()),
+                "model": getattr(self, "backend_type", "gemini"),
+                "choices": [],
+                "error": error_data,
+            }
+            logger.warning(
+                f"Returning error response for non-streaming request: {error_data.get('message', 'Unknown error')}"
+            )
+            return ResponseEnvelope(
+                content=error_response,
+                headers=streaming_response.headers or {},
+                status_code=error_status_code,
+                usage=None,
+            )
 
         # Build OpenAI-style response
         message_content: dict[str, Any] = {
 
@@ -632,8 +632,27 @@ async def _handle_non_streaming_response(
                 err = response.text
             raise HTTPException(status_code=response.status_code, detail=err)
 
+        response_json = response.json()
+        # Debug log raw response for non-streaming requests to help diagnose
+        # translation issues (e.g., Claude Code via Anthropic frontend)
+        if logger.isEnabledFor(logging.DEBUG):
+            choices_count = len(response_json.get("choices", []))
+            response_id = response_json.get("id", "unknown")
+            response_model = response_json.get("model", "unknown")
+            logger.debug(
+                "Non-streaming response from backend: id=%s model=%s choices_count=%d",
+                response_id,
+                response_model,
+                choices_count,
+            )
+            if choices_count == 0:
+                logger.debug(
+                    "Empty choices in non-streaming response - raw response: %s",
+                    str(response_json)[:500],
+                )
+
         domain_response = self.translation_service.to_domain_response(
-            response.json(), "openai"
+            response_json, "openai"
         )
         # Some tests use mocks that set response.headers to AsyncMock or
         # other non-dict types; defensively coerce to a dict and fall back