Azure-Samples
diff --git a/‎Agents/Infrastructure/AgentBase.cs‎
Lines changed: 82 additions & 0 deletions b/‎Agents/Infrastructure/AgentBase.cs‎
Lines changed: 82 additions & 0 deletions
@@ -170,6 +170,88 @@ protected async Task<string> ExecuteChatCompletionAsync(
                 var response = await ExecuteChatCompletionAsync(systemPrompt, userPrompt, contextIdentifier);
                 return (response, false, null);
             }
+            // ── Reasoning exhaustion catch (before transient error catch) ──
+            catch (ReasoningExhaustionException rex) when (UseResponsesApi && ResponsesClient != null)
+            {
+                var profile = ResponsesClient.Profile;
+                var maxExhaustionRetries = profile.ReasoningExhaustionMaxRetries;
+
+                Logger.LogWarning(
+                    "[{Agent}] Reasoning exhaustion for {Context}: {Message}",
+                    AgentName, contextIdentifier, rex.Message);
+
+                EnhancedLogger?.LogBehindTheScenes("REASONING_EXHAUSTION", "DETECTED",
+                    $"max_output_tokens={rex.MaxOutputTokens}, reasoning={rex.ReasoningTokens}, " +
+                    $"output={rex.ActualOutputTokens}, effort='{rex.ReasoningEffort}'", AgentName);
+
+                // Escalation loop: increase tokens and promote reasoning effort
+                var currentMaxTokens = rex.MaxOutputTokens;
+                var currentEffort = rex.ReasoningEffort;
+
+                for (int exhaustionRetry = 0; exhaustionRetry < maxExhaustionRetries; exhaustionRetry++)
+                {
+                    // Double the output tokens
+                    currentMaxTokens = (int)(currentMaxTokens * profile.ReasoningExhaustionRetryMultiplier);
+                    // Cap at profile maximum
+                    currentMaxTokens = Math.Min(currentMaxTokens, profile.MaxOutputTokens);
+
+                    // Promote reasoning effort: low → medium → high
+                    if (currentEffort == profile.LowReasoningEffort && currentEffort != profile.MediumReasoningEffort)
+                        currentEffort = profile.MediumReasoningEffort;
+                    else if (currentEffort == profile.MediumReasoningEffort && currentEffort != profile.HighReasoningEffort)
+                        currentEffort = profile.HighReasoningEffort;
+
+                    // ACTION 3: Thrash guard — if already at max tokens AND max effort, don't burn another API call
+                    if (currentMaxTokens >= profile.MaxOutputTokens && currentEffort == profile.HighReasoningEffort
+                        && exhaustionRetry > 0)
+                    {
+                        Logger.LogError(
+                            "[{Agent}] Thrash guard: already at max tokens ({Tokens}) and max effort ('{Effort}') " +
+                            "for {Context}. Failing fast — further retries are hopeless.",
+                            AgentName, currentMaxTokens, currentEffort, contextIdentifier);
+
+                        EnhancedLogger?.LogBehindTheScenes("REASONING_EXHAUSTION", "THRASH_GUARD",
+                            $"Stopped retrying: tokens={currentMaxTokens} (max), effort='{currentEffort}' (max)", AgentName);
+
+                        break;
+                    }
+
+                    Logger.LogInformation(
+                        "[{Agent}] Reasoning exhaustion retry {Retry}/{MaxRetries} for {Context}: " +
+                        "max_output_tokens={Tokens}, effort='{Effort}'",
+                        AgentName, exhaustionRetry + 1, maxExhaustionRetries,
+                        contextIdentifier, currentMaxTokens, currentEffort);
+
+                    try
+                    {
+                        var retryResponse = await ResponsesClient.GetResponseAsync(
+                            systemPrompt, userPrompt, currentMaxTokens, currentEffort);
+
+                        EnhancedLogger?.LogBehindTheScenes("REASONING_EXHAUSTION_RECOVERED", "SUCCESS",
+                            $"Recovered on retry {exhaustionRetry + 1} with tokens={currentMaxTokens}, effort='{currentEffort}'",
+                            AgentName);
+
+                        ChatLogger?.LogAIResponse(AgentName, contextIdentifier, retryResponse);
+                        return (retryResponse, false, null);
+                    }
+                    catch (ReasoningExhaustionException)
+                    {
+                        // Still exhausted, continue escalation loop
+                        Logger.LogWarning(
+                            "[{Agent}] Still exhausted after retry {Retry} with tokens={Tokens}",
+                            AgentName, exhaustionRetry + 1, currentMaxTokens);
+                    }
+                }
+
+                // All exhaustion retries failed
+                lastException = rex;
+                Logger.LogError(
+                    "[{Agent}] All {MaxRetries} reasoning exhaustion retries failed for {Context}",
+                    AgentName, maxExhaustionRetries, contextIdentifier);
+
+                return (string.Empty, true, $"Reasoning exhaustion: all {maxExhaustionRetries} escalation retries failed");
+            }
+            // ── END Reasoning exhaustion ──
             catch (Exception ex) when (IsTransientError(ex) && attempt < maxRetries)
             {
                 lastException = ex;