Add error handling for LLM timeouts and rate limits

neuromechanist · neuromechanist · commit 6cf098690e85 · 2026-03-04T02:45:06.000-08:00
- Add try/except with logging to evaluation, assessment, and feedback agents
- Map timeouts to HTTP 504, rate limits to HTTP 429 in API endpoints
- Add error_type field to streaming SSE error events
- Sanitize error messages to avoid leaking internal details
- Add debug log for silent ACCEPT fallback in evaluation parsing
diff --git a/src/agents/assessment_agent.py b/src/agents/assessment_agent.py
@@ -4,13 +4,16 @@
 elements or dimensions in the HED annotation.
 """
 
+import logging
 from pathlib import Path
 
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import HumanMessage, SystemMessage
 
 from src.agents.state import HedAnnotationState
 
+logger = logging.getLogger(__name__)
+
 
 class AssessmentAgent:
     """Agent that performs final assessment of HED annotations.
@@ -104,7 +107,11 @@ async def assess(self, state: HedAnnotationState) -> dict:
             HumanMessage(content=user_prompt),
         ]
 
-        response = await self.llm.ainvoke(messages)
+        try:
+            response = await self.llm.ainvoke(messages)
+        except Exception as e:
+            logger.error("Assessment LLM invocation failed: %s", e, exc_info=True)
+            raise
         content = response.content
         feedback = content.strip() if isinstance(content, str) else str(content)
 
diff --git a/src/agents/evaluation_agent.py b/src/agents/evaluation_agent.py
@@ -4,6 +4,7 @@
 the original natural language event description.
 """
 
+import logging
 import re
 from pathlib import Path
 
@@ -13,6 +14,8 @@
 from src.agents.state import HedAnnotationState
 from src.utils.json_schema_loader import HedJsonSchemaLoader, load_latest_schema
 
+logger = logging.getLogger(__name__)
+
 
 class EvaluationAgent:
     """Agent that evaluates the faithfulness of HED annotations.
@@ -164,7 +167,11 @@ async def evaluate(self, state: HedAnnotationState) -> dict:
             HumanMessage(content=user_prompt),
         ]
 
-        response = await self.llm.ainvoke(messages)
+        try:
+            response = await self.llm.ainvoke(messages)
+        except Exception as e:
+            logger.error("Evaluation LLM invocation failed: %s", e, exc_info=True)
+            raise
         content = response.content
         feedback = content.strip() if isinstance(content, str) else str(content)
 
@@ -206,6 +213,9 @@ def _parse_decision(self, feedback: str) -> bool:
             return False
 
         # Default to accept if ambiguous -- avoid unnecessary refinement loops
+        logger.debug(
+            "Evaluation parsing: no explicit DECISION/FAITHFUL/refine indicator found; defaulting to ACCEPT"
+        )
         return True
 
     def _check_tags_and_suggest(self, annotation: str) -> str:
diff --git a/src/agents/feedback_summarizer.py b/src/agents/feedback_summarizer.py
@@ -4,11 +4,15 @@
 into concise, actionable points for the annotation agent.
 """
 
+import logging
+
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import HumanMessage, SystemMessage
 
 from src.agents.state import HedAnnotationState
 
+logger = logging.getLogger(__name__)
+
 
 class FeedbackSummarizer:
     """Agent that summarizes validation errors and feedback.
@@ -112,7 +116,11 @@ async def summarize(self, state: HedAnnotationState) -> dict:
             HumanMessage(content=user_prompt),
         ]
 
-        response = await self.llm.ainvoke(messages)
+        try:
+            response = await self.llm.ainvoke(messages)
+        except Exception as e:
+            logger.error("Feedback summarization LLM invocation failed: %s", e, exc_info=True)
+            raise
         content = response.content
         summarized_feedback = content.strip() if isinstance(content, str) else str(content)
 
diff --git a/src/api/main.py b/src/api/main.py
@@ -18,6 +18,7 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
 from langchain_community.chat_models import ChatOllama
+from openai import APITimeoutError, RateLimitError
 
 from src import __version__
 from src.agents.vision_agent import VisionAgent
@@ -699,10 +700,21 @@ async def annotate(
             status=status,
         )
 
+    except APITimeoutError as e:
+        raise HTTPException(
+            status_code=504,
+            detail="LLM request timed out. Try again or use a faster model/provider.",
+        ) from e
+    except RateLimitError as e:
+        raise HTTPException(
+            status_code=429,
+            detail="LLM rate limit exceeded. Please wait and try again.",
+        ) from e
     except Exception as e:
+        logging.exception("Annotation workflow failed")
         raise HTTPException(
             status_code=500,
-            detail=f"Annotation workflow failed: {str(e)}",
+            detail="An error occurred during annotation processing.",
         ) from e
 
 
@@ -895,10 +907,21 @@ async def annotate_from_image(
             image_metadata=image_metadata,
         )
 
+    except APITimeoutError as e:
+        raise HTTPException(
+            status_code=504,
+            detail="LLM request timed out. Try again or use a faster model/provider.",
+        ) from e
+    except RateLimitError as e:
+        raise HTTPException(
+            status_code=429,
+            detail="LLM rate limit exceeded. Please wait and try again.",
+        ) from e
     except Exception as e:
+        logging.exception("Image annotation workflow failed")
         raise HTTPException(
             status_code=500,
-            detail=f"Image annotation workflow failed: {str(e)}",
+            detail="An error occurred during image annotation processing.",
         ) from e
 
 
@@ -1110,10 +1133,35 @@ def send_event(event_type: str, data: dict) -> str:
 
         except asyncio.CancelledError:
             raise
+        except APITimeoutError:
+            logging.exception("Streaming workflow timeout")
+            yield send_event(
+                "error",
+                {
+                    "message": "LLM request timed out. Try again or use a faster model/provider.",
+                    "error_type": "timeout",
+                },
+            )
+            yield send_event("done", {"message": "Workflow ended with error"})
+        except RateLimitError:
+            logging.exception("Streaming workflow rate limit")
+            yield send_event(
+                "error",
+                {
+                    "message": "LLM rate limit exceeded. Please wait and try again.",
+                    "error_type": "rate_limit",
+                },
+            )
+            yield send_event("done", {"message": "Workflow ended with error"})
         except Exception:
-            # Log the actual error for debugging, but return a generic message
             logging.exception("Streaming workflow error")
-            yield send_event("error", {"message": "An error occurred during annotation processing"})
+            yield send_event(
+                "error",
+                {
+                    "message": "An error occurred during annotation processing.",
+                    "error_type": "internal",
+                },
+            )
             yield send_event("done", {"message": "Workflow ended with error"})
 
     return StreamingResponse(
@@ -1381,10 +1429,35 @@ def send_event(event_type: str, data: dict) -> str:
 
         except asyncio.CancelledError:
             raise
+        except APITimeoutError:
+            logging.exception("Streaming image workflow timeout")
+            yield send_event(
+                "error",
+                {
+                    "message": "LLM request timed out. Try again or use a faster model/provider.",
+                    "error_type": "timeout",
+                },
+            )
+            yield send_event("done", {"message": "Workflow ended with error"})
+        except RateLimitError:
+            logging.exception("Streaming image workflow rate limit")
+            yield send_event(
+                "error",
+                {
+                    "message": "LLM rate limit exceeded. Please wait and try again.",
+                    "error_type": "rate_limit",
+                },
+            )
+            yield send_event("done", {"message": "Workflow ended with error"})
         except Exception:
-            # Log the actual error for debugging, but return a generic message
             logging.exception("Streaming image annotation workflow error")
-            yield send_event("error", {"message": "An error occurred during image annotation"})
+            yield send_event(
+                "error",
+                {
+                    "message": "An error occurred during image annotation processing.",
+                    "error_type": "internal",
+                },
+            )
             yield send_event("done", {"message": "Workflow ended with error"})
 
     return StreamingResponse(