matdev83
diff --git a/‎config/config.example.yaml‎
Lines changed: 7 additions & 0 deletions b/‎config/config.example.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎data/cli_flag_snapshot.txt‎
Lines changed: 2 additions & 0 deletions b/‎data/cli_flag_snapshot.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/user_guide/cli-parameters.md‎
Lines changed: 7 additions & 0 deletions b/‎docs/user_guide/cli-parameters.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/connectors/gemini_base/connector.py‎
Lines changed: 39 additions & 3 deletions b/‎src/connectors/gemini_base/connector.py‎
Lines changed: 39 additions & 3 deletions
diff --git a/‎src/core/cli.py‎
Lines changed: 40 additions & 0 deletions b/‎src/core/cli.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/core/config/app_config.py‎
Lines changed: 22 additions & 0 deletions b/‎src/core/config/app_config.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎src/core/di/services.py‎
Lines changed: 43 additions & 0 deletions b/‎src/core/di/services.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎src/core/domain/configuration/compaction_config.py‎
Lines changed: 3 additions & 3 deletions b/‎src/core/domain/configuration/compaction_config.py‎
Lines changed: 3 additions & 3 deletions
@@ -106,6 +106,13 @@ session:
   force_reprocess_tool_calls: false
   log_skipped_tool_calls: false
 
+# History Compaction
+# Reduces context size by compacting stale tool outputs in long conversations.
+# This helps maintain performance and stay within context limits.
+compaction:
+  enabled: false  # Default: false. usage: true to enable.
+  token_threshold: 100000  # Minimum tokens required to trigger compaction. Default: 100,000.
+
 # Logging
 logging:
   level: "INFO"  # TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL
 
@@ -18,6 +18,7 @@
 --cbor-capture-dir
 --cbor-capture-session
 --command-prefix
+--compaction-min-tokens
 --config
 --daemon
 --default-backend
@@ -51,6 +52,7 @@
 --enable-antigravity-backend-debugging-override
 --enable-brute-force-protection
 --enable-cline-backend-debugging-override
+--enable-context-compaction
 --enable-droid-path-fix
 --enable-edit-precision
 --enable-gemini-oauth-free-backend-debugging-override
 
@@ -175,6 +175,13 @@ Configuration is resolved in the following order (highest to lowest priority):
 | N/A | `STREAMING_SAMPLER_RATE` | Sampling rate (0.0 to 1.0). |
 | N/A | `STREAMING_SAMPLER_MAX_SAMPLES` | Max samples to retain. |
 
+### History Compaction
+
+| CLI Argument | Environment Variable | Description |
+| :--- | :--- | :--- |
+| `--enable-context-compaction` | `ENABLE_CONTEXT_COMPACTION=true` | Enable history compaction to reduce stale tool outputs. (Default: Disabled) |
+| `--compaction-min-tokens N` | `COMPACTION_MIN_TOKENS` | Minimum tokens required to trigger compaction. (Default: 100,000) |
+
 ---
 
 ## Features
 
@@ -3883,9 +3883,27 @@ async def _handle_429_with_graceful_degradation(
                 retry_delay = (
                     self._extract_retry_delay(last_error) if last_error else None
                 )
+
+                # For empty responses without explicit retry delay, use a minimal cooldown
+                # (5s instead of default 10min) since empty responses indicate transient
+                # issues (content filtering, safety blocks) not quota exhaustion
+                if retry_delay is None and last_error:
+                    is_empty_response = (
+                        getattr(last_error, "code", None) == "empty_response"
+                    )
+                    if is_empty_response:
+                        retry_delay = 5.0  # Minimal cooldown for empty responses
+                        logger.info(
+                            "Model %s returned empty response, using minimal cooldown of %.1fs",
+                            model,
+                            retry_delay,
+                        )
+
                 self._set_cooldown(model, duration=retry_delay)
 
-                if retry_delay:
+                if retry_delay and not (
+                    last_error and getattr(last_error, "code", None) == "empty_response"
+                ):
                     logger.info(
                         "Model %s put in cooldown for %.1fs based on API response",
                         model,
@@ -3905,15 +3923,33 @@ async def _handle_429_with_graceful_degradation(
                 retry_delay = (
                     self._extract_retry_delay(last_error) if last_error else None
                 )
+
+                # For empty responses without explicit retry delay, use a minimal cooldown
+                if retry_delay is None and last_error:
+                    is_empty_response = (
+                        getattr(last_error, "code", None) == "empty_response"
+                    )
+                    if is_empty_response:
+                        retry_delay = 5.0  # Minimal cooldown for empty responses
+                        logger.info(
+                            "Fallback model %s returned empty response, using minimal cooldown of %.1fs",
+                            model,
+                            retry_delay,
+                        )
+
                 self._set_cooldown(model, duration=retry_delay)
 
-                if retry_delay:
+                if retry_delay and not (
+                    last_error and getattr(last_error, "code", None) == "empty_response"
+                ):
                     logger.info(
                         "Fallback model %s put in cooldown for %.1fs based on API response",
                         model,
                         retry_delay,
                     )
-                else:
+                elif not (
+                    last_error and getattr(last_error, "code", None) == "empty_response"
+                ):
                     logger.info("Fallback model %s exhausted, put in cooldown", model)
 
         # If we get here, all requested models failed
 
@@ -419,6 +419,25 @@ def validate_model_alias(value: str) -> tuple[str, str]:
         help="Enable file access sandboxing to restrict file operations to the project directory (env: ENABLE_SANDBOXING)",
     )
 
+    # History Compaction options
+    compaction_group = parser.add_argument_group(
+        "History Compaction", "Options for tool output compaction"
+    )
+    compaction_group.add_argument(
+        "--enable-context-compaction",
+        dest="enable_context_compaction",
+        action="store_true",
+        default=None,
+        help="Enable history compaction for stale tool outputs (overrides config)",
+    )
+    compaction_group.add_argument(
+        "--compaction-min-tokens",
+        dest="compaction_min_tokens",
+        type=int,
+        metavar="TOKENS",
+        help="Minimum token estimate to trigger compaction (default: 100,000)",
+    )
+
     # Planning phase options
     parser.add_argument(
         "--enable-planning-phase",
@@ -1349,6 +1368,27 @@ def record_cli(path: str, value: Any, flag: str) -> None:
         ]
         os.environ["MODEL_ALIASES"] = json.dumps(alias_data)
 
+    # Compaction configuration
+    compaction_overrides: dict[str, Any] = {}
+    if getattr(args, "enable_context_compaction", None) is not None:
+        compaction_overrides["enabled"] = args.enable_context_compaction
+        record_cli(
+            "compaction.enabled",
+            args.enable_context_compaction,
+            "--enable-context-compaction",
+        )
+
+    if getattr(args, "compaction_min_tokens", None) is not None:
+        compaction_overrides["token_threshold"] = args.compaction_min_tokens
+        record_cli(
+            "compaction.token_threshold",
+            args.compaction_min_tokens,
+            "--compaction-min-tokens",
+        )
+
+    if compaction_overrides:
+        cli_overrides["compaction"] = compaction_overrides
+
     # API keys and URLs
     if args.openrouter_api_key is not None:
         normalized_key = _normalize_api_key_value(args.openrouter_api_key)
 
@@ -41,6 +41,7 @@ def get_openrouter_headers(cfg: dict[str, str], api_key: str) -> dict[str, str]:
 
 from src.core.domain.configuration.app_identity_config import AppIdentityConfig
 from src.core.domain.configuration.assessment_config import AssessmentConfig
+from src.core.domain.configuration.compaction_config import CompactionConfig
 from src.core.domain.configuration.header_config import (
     HeaderConfig,
     HeaderOverrideMode,
@@ -1228,6 +1229,9 @@ class AppConfig(DomainModel, IConfig):
     # Routing settings
     routing: RoutingConfig = Field(default_factory=RoutingConfig)
 
+    # Compaction settings
+    compaction: CompactionConfig = Field(default_factory=CompactionConfig)
+
     # ProxyMem - cross-session memory layer settings
     memory: MemoryConfiguration = Field(default_factory=MemoryConfiguration)
 
@@ -2176,6 +2180,24 @@ def from_env(
             ),
         }
 
+        # Compaction configuration
+        config["compaction"] = {
+            "enabled": _env_to_bool(
+                "ENABLE_CONTEXT_COMPACTION",
+                False,
+                env,
+                path="compaction.enabled",
+                resolution=resolution,
+            ),
+            "token_threshold": _env_to_int(
+                "COMPACTION_MIN_TOKENS",
+                100_000,
+                env,
+                path="compaction.token_threshold",
+                resolution=resolution,
+            ),
+        }
+
         config["identity"] = AppIdentityConfig(
             title=HeaderConfig(
                 override_value=_get_env_value(
 
@@ -47,6 +47,10 @@
 from src.core.interfaces.command_service_interface import ICommandService
 from src.core.interfaces.configuration_interface import IConfig
 from src.core.interfaces.di_interface import IServiceProvider
+from src.core.interfaces.failure_strategy_interface import (
+    FailureHandlingConfig,
+    IFailureHandlingStrategy,
+)
 from src.core.interfaces.loop_detector_interface import ILoopDetector
 from src.core.interfaces.memory_service_interface import IMemoryService
 
@@ -1216,12 +1220,14 @@ def _backend_request_manager_factory(
         wire_capture = provider.get_required_service(IWireCapture)  # type: ignore[type-abstract]
         # Optional: history compaction service for context compaction feature
         history_compaction_service = provider.get_service(HistoryCompactionService)
+        config = provider.get_required_service(AppConfig)
         return BackendRequestManager(
             backend_processor,
             response_processor,
             angel_service_factory,
             wire_capture,
             history_compaction_service=history_compaction_service,
+            config=config,
         )
 
     _add_singleton(
@@ -2432,6 +2438,37 @@ def _resilience_coordinator_factory(
         ResilienceCoordinator, implementation_factory=_resilience_coordinator_factory
     )
 
+    # Register failure handling strategy
+    def _failure_handling_strategy_factory(
+        provider: IServiceProvider,
+    ) -> IFailureHandlingStrategy:
+        from src.core.services.backend_routing_service import BackendRoutingService
+        from src.core.services.failure_handling_strategy import (
+            DefaultFailureHandlingStrategy,
+        )
+
+        # Get routing service for backend discovery
+        routing_service = provider.get_service(BackendRoutingService)
+
+        # Create default configuration
+        config = FailureHandlingConfig(
+            max_silent_wait=30.0,
+            total_timeout_budget=90.0,
+            keepalive_interval=8.0,
+            max_failover_hops=5,
+            min_retry_wait=1.0,
+        )
+
+        return DefaultFailureHandlingStrategy(
+            config=config,
+            backend_discovery=routing_service,
+        )
+
+    _add_singleton(
+        cast(type, IFailureHandlingStrategy),
+        implementation_factory=_failure_handling_strategy_factory,
+    )
+
     # Register backend service
     def _backend_service_factory(provider: IServiceProvider) -> BackendService:
         # Import required modules
@@ -2531,6 +2568,11 @@ def _backend_service_factory(provider: IServiceProvider) -> BackendService:
         # Get or create resilience coordinator
         resilience_coordinator = provider.get_service(ResilienceCoordinator)
 
+        # Get or create failure handling strategy
+        failure_handling_strategy = provider.get_service(
+            cast(type, IFailureHandlingStrategy)
+        )
+
         # Return backend service
         return BackendService(
             backend_factory,
@@ -2544,6 +2586,7 @@ def _backend_service_factory(provider: IServiceProvider) -> BackendService:
             wire_capture=provider.get_required_service(IWireCapture),  # type: ignore[type-abstract]
             routing_service=routing_service,
             resilience_coordinator=resilience_coordinator,
+            failure_handling_strategy=failure_handling_strategy,
         )
 
     # Register backend service and bind to interface
 
@@ -34,7 +34,7 @@ class CompactionConfig:
         stub_template: Template for generating stub messages
     """
 
-    enabled: bool = True
+    enabled: bool = False
     token_threshold: int = 100_000  # Start compacting above this estimate
     max_tokens: int = 150_000  # Warn if cannot reduce below this
 
@@ -90,7 +90,7 @@ def from_dict(cls, data: dict[str, Any]) -> "CompactionConfig":
             CompactionConfig instance
         """
         return cls(
-            enabled=data.get("enabled", True),
+            enabled=data.get("enabled", False),
             token_threshold=data.get("token_threshold", 100_000),
             max_tokens=data.get("max_tokens", 150_000),
             allowed_tool_categories=data.get("allowed_tool_categories", []),
@@ -123,7 +123,7 @@ def default(cls) -> "CompactionConfig":
             CompactionConfig with sensible defaults
         """
         return cls(
-            enabled=True,
+            enabled=False,
             token_threshold=100_000,
             max_tokens=150_000,
             allowed_tool_categories=[