Skip to content

Commit 61674d1

Browse files
author
Mateusz
committed
feat: Add history compaction and failure handling strategy
1 parent 7ca97bc commit 61674d1

20 files changed

+3198
-1419
lines changed

config/config.example.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ session:
106106
force_reprocess_tool_calls: false
107107
log_skipped_tool_calls: false
108108

109+
# History Compaction
110+
# Reduces context size by compacting stale tool outputs in long conversations.
111+
# This helps maintain performance and stay within context limits.
112+
compaction:
113+
enabled: false # Default: false. usage: true to enable.
114+
token_threshold: 100000 # Minimum tokens required to trigger compaction. Default: 100,000.
115+
109116
# Logging
110117
logging:
111118
level: "INFO" # TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL

data/cli_flag_snapshot.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
--cbor-capture-dir
1919
--cbor-capture-session
2020
--command-prefix
21+
--compaction-min-tokens
2122
--config
2223
--daemon
2324
--default-backend
@@ -51,6 +52,7 @@
5152
--enable-antigravity-backend-debugging-override
5253
--enable-brute-force-protection
5354
--enable-cline-backend-debugging-override
55+
--enable-context-compaction
5456
--enable-droid-path-fix
5557
--enable-edit-precision
5658
--enable-gemini-oauth-free-backend-debugging-override

docs/user_guide/cli-parameters.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ Configuration is resolved in the following order (highest to lowest priority):
175175
| N/A | `STREAMING_SAMPLER_RATE` | Sampling rate (0.0 to 1.0). |
176176
| N/A | `STREAMING_SAMPLER_MAX_SAMPLES` | Max samples to retain. |
177177

178+
### History Compaction
179+
180+
| CLI Argument | Environment Variable | Description |
181+
| :--- | :--- | :--- |
182+
| `--enable-context-compaction` | `ENABLE_CONTEXT_COMPACTION=true` | Enable history compaction to reduce stale tool outputs. (Default: Disabled) |
183+
| `--compaction-min-tokens N` | `COMPACTION_MIN_TOKENS` | Minimum tokens required to trigger compaction. (Default: 100,000) |
184+
178185
---
179186

180187
## Features

src/connectors/gemini_base/connector.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3883,9 +3883,27 @@ async def _handle_429_with_graceful_degradation(
38833883
retry_delay = (
38843884
self._extract_retry_delay(last_error) if last_error else None
38853885
)
3886+
3887+
# For empty responses without explicit retry delay, use a minimal cooldown
3888+
# (5s instead of default 10min) since empty responses indicate transient
3889+
# issues (content filtering, safety blocks) not quota exhaustion
3890+
if retry_delay is None and last_error:
3891+
is_empty_response = (
3892+
getattr(last_error, "code", None) == "empty_response"
3893+
)
3894+
if is_empty_response:
3895+
retry_delay = 5.0 # Minimal cooldown for empty responses
3896+
logger.info(
3897+
"Model %s returned empty response, using minimal cooldown of %.1fs",
3898+
model,
3899+
retry_delay,
3900+
)
3901+
38863902
self._set_cooldown(model, duration=retry_delay)
38873903

3888-
if retry_delay:
3904+
if retry_delay and not (
3905+
last_error and getattr(last_error, "code", None) == "empty_response"
3906+
):
38893907
logger.info(
38903908
"Model %s put in cooldown for %.1fs based on API response",
38913909
model,
@@ -3905,15 +3923,33 @@ async def _handle_429_with_graceful_degradation(
39053923
retry_delay = (
39063924
self._extract_retry_delay(last_error) if last_error else None
39073925
)
3926+
3927+
# For empty responses without explicit retry delay, use a minimal cooldown
3928+
if retry_delay is None and last_error:
3929+
is_empty_response = (
3930+
getattr(last_error, "code", None) == "empty_response"
3931+
)
3932+
if is_empty_response:
3933+
retry_delay = 5.0 # Minimal cooldown for empty responses
3934+
logger.info(
3935+
"Fallback model %s returned empty response, using minimal cooldown of %.1fs",
3936+
model,
3937+
retry_delay,
3938+
)
3939+
39083940
self._set_cooldown(model, duration=retry_delay)
39093941

3910-
if retry_delay:
3942+
if retry_delay and not (
3943+
last_error and getattr(last_error, "code", None) == "empty_response"
3944+
):
39113945
logger.info(
39123946
"Fallback model %s put in cooldown for %.1fs based on API response",
39133947
model,
39143948
retry_delay,
39153949
)
3916-
else:
3950+
elif not (
3951+
last_error and getattr(last_error, "code", None) == "empty_response"
3952+
):
39173953
logger.info("Fallback model %s exhausted, put in cooldown", model)
39183954

39193955
# If we get here, all requested models failed

src/core/cli.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,25 @@ def validate_model_alias(value: str) -> tuple[str, str]:
419419
help="Enable file access sandboxing to restrict file operations to the project directory (env: ENABLE_SANDBOXING)",
420420
)
421421

422+
# History Compaction options
423+
compaction_group = parser.add_argument_group(
424+
"History Compaction", "Options for tool output compaction"
425+
)
426+
compaction_group.add_argument(
427+
"--enable-context-compaction",
428+
dest="enable_context_compaction",
429+
action="store_true",
430+
default=None,
431+
help="Enable history compaction for stale tool outputs (overrides config)",
432+
)
433+
compaction_group.add_argument(
434+
"--compaction-min-tokens",
435+
dest="compaction_min_tokens",
436+
type=int,
437+
metavar="TOKENS",
438+
help="Minimum token estimate to trigger compaction (default: 100,000)",
439+
)
440+
422441
# Planning phase options
423442
parser.add_argument(
424443
"--enable-planning-phase",
@@ -1349,6 +1368,27 @@ def record_cli(path: str, value: Any, flag: str) -> None:
13491368
]
13501369
os.environ["MODEL_ALIASES"] = json.dumps(alias_data)
13511370

1371+
# Compaction configuration
1372+
compaction_overrides: dict[str, Any] = {}
1373+
if getattr(args, "enable_context_compaction", None) is not None:
1374+
compaction_overrides["enabled"] = args.enable_context_compaction
1375+
record_cli(
1376+
"compaction.enabled",
1377+
args.enable_context_compaction,
1378+
"--enable-context-compaction",
1379+
)
1380+
1381+
if getattr(args, "compaction_min_tokens", None) is not None:
1382+
compaction_overrides["token_threshold"] = args.compaction_min_tokens
1383+
record_cli(
1384+
"compaction.token_threshold",
1385+
args.compaction_min_tokens,
1386+
"--compaction-min-tokens",
1387+
)
1388+
1389+
if compaction_overrides:
1390+
cli_overrides["compaction"] = compaction_overrides
1391+
13521392
# API keys and URLs
13531393
if args.openrouter_api_key is not None:
13541394
normalized_key = _normalize_api_key_value(args.openrouter_api_key)

src/core/config/app_config.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def get_openrouter_headers(cfg: dict[str, str], api_key: str) -> dict[str, str]:
4141

4242
from src.core.domain.configuration.app_identity_config import AppIdentityConfig
4343
from src.core.domain.configuration.assessment_config import AssessmentConfig
44+
from src.core.domain.configuration.compaction_config import CompactionConfig
4445
from src.core.domain.configuration.header_config import (
4546
HeaderConfig,
4647
HeaderOverrideMode,
@@ -1228,6 +1229,9 @@ class AppConfig(DomainModel, IConfig):
12281229
# Routing settings
12291230
routing: RoutingConfig = Field(default_factory=RoutingConfig)
12301231

1232+
# Compaction settings
1233+
compaction: CompactionConfig = Field(default_factory=CompactionConfig)
1234+
12311235
# ProxyMem - cross-session memory layer settings
12321236
memory: MemoryConfiguration = Field(default_factory=MemoryConfiguration)
12331237

@@ -2176,6 +2180,24 @@ def from_env(
21762180
),
21772181
}
21782182

2183+
# Compaction configuration
2184+
config["compaction"] = {
2185+
"enabled": _env_to_bool(
2186+
"ENABLE_CONTEXT_COMPACTION",
2187+
False,
2188+
env,
2189+
path="compaction.enabled",
2190+
resolution=resolution,
2191+
),
2192+
"token_threshold": _env_to_int(
2193+
"COMPACTION_MIN_TOKENS",
2194+
100_000,
2195+
env,
2196+
path="compaction.token_threshold",
2197+
resolution=resolution,
2198+
),
2199+
}
2200+
21792201
config["identity"] = AppIdentityConfig(
21802202
title=HeaderConfig(
21812203
override_value=_get_env_value(

src/core/di/services.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@
4747
from src.core.interfaces.command_service_interface import ICommandService
4848
from src.core.interfaces.configuration_interface import IConfig
4949
from src.core.interfaces.di_interface import IServiceProvider
50+
from src.core.interfaces.failure_strategy_interface import (
51+
FailureHandlingConfig,
52+
IFailureHandlingStrategy,
53+
)
5054
from src.core.interfaces.loop_detector_interface import ILoopDetector
5155
from src.core.interfaces.memory_service_interface import IMemoryService
5256

@@ -1216,12 +1220,14 @@ def _backend_request_manager_factory(
12161220
wire_capture = provider.get_required_service(IWireCapture) # type: ignore[type-abstract]
12171221
# Optional: history compaction service for context compaction feature
12181222
history_compaction_service = provider.get_service(HistoryCompactionService)
1223+
config = provider.get_required_service(AppConfig)
12191224
return BackendRequestManager(
12201225
backend_processor,
12211226
response_processor,
12221227
angel_service_factory,
12231228
wire_capture,
12241229
history_compaction_service=history_compaction_service,
1230+
config=config,
12251231
)
12261232

12271233
_add_singleton(
@@ -2432,6 +2438,37 @@ def _resilience_coordinator_factory(
24322438
ResilienceCoordinator, implementation_factory=_resilience_coordinator_factory
24332439
)
24342440

2441+
# Register failure handling strategy
2442+
def _failure_handling_strategy_factory(
2443+
provider: IServiceProvider,
2444+
) -> IFailureHandlingStrategy:
2445+
from src.core.services.backend_routing_service import BackendRoutingService
2446+
from src.core.services.failure_handling_strategy import (
2447+
DefaultFailureHandlingStrategy,
2448+
)
2449+
2450+
# Get routing service for backend discovery
2451+
routing_service = provider.get_service(BackendRoutingService)
2452+
2453+
# Create default configuration
2454+
config = FailureHandlingConfig(
2455+
max_silent_wait=30.0,
2456+
total_timeout_budget=90.0,
2457+
keepalive_interval=8.0,
2458+
max_failover_hops=5,
2459+
min_retry_wait=1.0,
2460+
)
2461+
2462+
return DefaultFailureHandlingStrategy(
2463+
config=config,
2464+
backend_discovery=routing_service,
2465+
)
2466+
2467+
_add_singleton(
2468+
cast(type, IFailureHandlingStrategy),
2469+
implementation_factory=_failure_handling_strategy_factory,
2470+
)
2471+
24352472
# Register backend service
24362473
def _backend_service_factory(provider: IServiceProvider) -> BackendService:
24372474
# Import required modules
@@ -2531,6 +2568,11 @@ def _backend_service_factory(provider: IServiceProvider) -> BackendService:
25312568
# Get or create resilience coordinator
25322569
resilience_coordinator = provider.get_service(ResilienceCoordinator)
25332570

2571+
# Get or create failure handling strategy
2572+
failure_handling_strategy = provider.get_service(
2573+
cast(type, IFailureHandlingStrategy)
2574+
)
2575+
25342576
# Return backend service
25352577
return BackendService(
25362578
backend_factory,
@@ -2544,6 +2586,7 @@ def _backend_service_factory(provider: IServiceProvider) -> BackendService:
25442586
wire_capture=provider.get_required_service(IWireCapture), # type: ignore[type-abstract]
25452587
routing_service=routing_service,
25462588
resilience_coordinator=resilience_coordinator,
2589+
failure_handling_strategy=failure_handling_strategy,
25472590
)
25482591

25492592
# Register backend service and bind to interface

src/core/domain/configuration/compaction_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class CompactionConfig:
3434
stub_template: Template for generating stub messages
3535
"""
3636

37-
enabled: bool = True
37+
enabled: bool = False
3838
token_threshold: int = 100_000 # Start compacting above this estimate
3939
max_tokens: int = 150_000 # Warn if cannot reduce below this
4040

@@ -90,7 +90,7 @@ def from_dict(cls, data: dict[str, Any]) -> "CompactionConfig":
9090
CompactionConfig instance
9191
"""
9292
return cls(
93-
enabled=data.get("enabled", True),
93+
enabled=data.get("enabled", False),
9494
token_threshold=data.get("token_threshold", 100_000),
9595
max_tokens=data.get("max_tokens", 150_000),
9696
allowed_tool_categories=data.get("allowed_tool_categories", []),
@@ -123,7 +123,7 @@ def default(cls) -> "CompactionConfig":
123123
CompactionConfig with sensible defaults
124124
"""
125125
return cls(
126-
enabled=True,
126+
enabled=False,
127127
token_threshold=100_000,
128128
max_tokens=150_000,
129129
allowed_tool_categories=[

0 commit comments

Comments
 (0)