Skip to content

Commit f3cb61c

Browse files
Mateuszfactory-droid[bot]
andcommitted
feat: Enhance resilience, streaming keepalive, and redaction caching
- Centralize graceful degradation logic in BackendService and disable connector-level fallbacks - Implement keepalive signals for streaming requests during retry waits - Add RedactionCache to optimize secret scanning for conversation history - Improve Gemini rate limit handling by extracting retry-after from Google RPC errors - Add diagnostic script for Antigravity rate limits Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
1 parent 65e3ef5 commit f3cb61c

18 files changed

+1493
-39
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
![Python](https://img.shields.io/badge/python-3.10%2B-blue)
77
[![License](https://img.shields.io/github/license/matdev83/llm-interactive-proxy?color=blue)](LICENSE)
88

9-
A swiss-army knife proxy for LLM-powered applications. Sits between any LLM-aware client and any backend, presenting multiple front-end APIs (OpenAI, Anthropic, Gemini) while routing to your chosen provider. Translate requests, override models, rotate API keys, prevent leaks, inspect traffic, and execute chat-embedded commands—all from a single drop-in gateway.
9+
A swiss-army knife proxy for LLM-powered applications. Sits between any LLM-aware client (agent) and any backend, presenting multiple front-end APIs (OpenAI, Responses API, Anthropic, Gemini) while routing to your chosen provider. Translate requests, override models, rotate API keys, prevent leaks, inspect traffic, and execute chat-embedded commands—all from a single drop-in gateway.
1010

1111
## Architecture
1212

docs/user_guide/features/health-checks.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,4 @@ health_check:
393393

394394

395395

396+
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Diagnostic script to analyze Antigravity backend request behavior.
4+
5+
This script helps diagnose why the proxy might be triggering rate limits
6+
when native Antigravity works fine.
7+
8+
Usage:
9+
./.venv/Scripts/python.exe scripts/diagnose_antigravity_rate_limit.py
10+
"""
11+
12+
import asyncio
13+
import json
14+
import logging
15+
import os
16+
import sys
17+
from pathlib import Path
18+
19+
# Add src to path
20+
sys.path.insert(0, str(Path(__file__).parent.parent))
21+
22+
from src.connectors.gemini_oauth_antigravity import GeminiOAuthAntigravityConnector
23+
from src.core.config.app_config import AppConfig
24+
from src.core.di.container import ServiceCollection
25+
from src.core.services.translation_service import TranslationService
26+
27+
logging.basicConfig(
28+
level=logging.DEBUG,
29+
format="%(asctime)s [%(levelname)s] %(name)s:%(lineno)d %(message)s",
30+
)
31+
logger = logging.getLogger(__name__)
32+
33+
# Suppress noisy loggers
34+
logging.getLogger("httpx").setLevel(logging.WARNING)
35+
logging.getLogger("httpcore").setLevel(logging.WARNING)
36+
37+
38+
async def diagnose():
39+
"""Run diagnostic checks on Antigravity backend."""
40+
import httpx
41+
42+
print("=" * 60)
43+
print("Antigravity Backend Diagnostic")
44+
print("=" * 60)
45+
46+
# Create connector
47+
async with httpx.AsyncClient(timeout=60.0) as client:
48+
config = AppConfig()
49+
50+
# Setup DI container for TranslationService
51+
services = ServiceCollection()
52+
services.add_singleton(TranslationService)
53+
provider = services.build_service_provider()
54+
translation_service = provider.get_required_service(TranslationService)
55+
56+
connector = GeminiOAuthAntigravityConnector(
57+
client=client,
58+
config=config,
59+
translation_service=translation_service,
60+
)
61+
62+
# Check configuration
63+
print("\n--- Configuration ---")
64+
print(f"Backend type: {connector.backend_type}")
65+
print(f"API base URL: {connector.gemini_api_base_url}")
66+
print(f"Graceful degradation enabled: {connector._degradation_config.enabled}")
67+
print(f"Recovery probing enabled: {connector._degradation_config.enable_recovery_probing}")
68+
69+
# Check credentials
70+
print("\n--- Credentials ---")
71+
try:
72+
await connector.initialize()
73+
print(f"Credentials loaded: {bool(connector._oauth_credentials)}")
74+
if connector._oauth_credentials:
75+
print(f"Has access token: {'access_token' in connector._oauth_credentials}")
76+
print(f"Has project_id: {'project_id' in connector._oauth_credentials}")
77+
except Exception as e:
78+
print(f"Error loading credentials: {e}")
79+
return
80+
81+
# Check request body builder
82+
print("\n--- Request Body Builder ---")
83+
print(f"Type: {type(connector._request_body_builder).__name__}")
84+
85+
# Check endpoint config
86+
print("\n--- Endpoint Config ---")
87+
print(f"Type: {type(connector._endpoint_config).__name__}")
88+
89+
# Check session headers
90+
print("\n--- Session Headers ---")
91+
headers = connector._get_session_headers()
92+
for k, v in headers.items():
93+
print(f" {k}: {v}")
94+
95+
# Build a sample request body to see the structure
96+
print("\n--- Sample Request Body Structure ---")
97+
from src.core.domain.chat import CanonicalChatRequest, ChatMessage
98+
99+
sample_request = CanonicalChatRequest(
100+
model="gemini-3-pro-high",
101+
messages=[ChatMessage(role="user", content="Hello, test message")],
102+
stream=True,
103+
)
104+
105+
# Convert to inner request format
106+
gemini_request = translation_service.from_domain_to_gemini_request(sample_request)
107+
print(f"Gemini request keys: {list(gemini_request.keys())}")
108+
109+
contents = gemini_request.get("contents", [])
110+
print(f"Contents count: {len(contents)}")
111+
if contents:
112+
total_chars = sum(
113+
len(part.get("text", ""))
114+
for content in contents
115+
for part in content.get("parts", [])
116+
)
117+
print(f"Total content chars: {total_chars}")
118+
119+
# Build the wrapped request body
120+
inner_request = {
121+
"contents": contents,
122+
"generationConfig": gemini_request.get("generationConfig", {}),
123+
}
124+
125+
wrapped_body = connector._request_body_builder.build(
126+
effective_model="gemini-3-pro-high",
127+
project_id="test-project",
128+
request_data=sample_request,
129+
inner_request=inner_request,
130+
)
131+
132+
print(f"Wrapped body keys: {list(wrapped_body.keys())}")
133+
134+
# Calculate size
135+
body_json = json.dumps(wrapped_body)
136+
print(f"Request body size: {len(body_json)} bytes")
137+
138+
# Show the structure (truncated)
139+
print("\n--- Request Body Preview (truncated) ---")
140+
preview = json.dumps(wrapped_body, indent=2)[:1000]
141+
print(preview)
142+
if len(json.dumps(wrapped_body, indent=2)) > 1000:
143+
print("... (truncated)")
144+
145+
print("\n" + "=" * 60)
146+
print("Diagnostic complete")
147+
print("=" * 60)
148+
149+
150+
if __name__ == "__main__":
151+
asyncio.run(diagnose())
152+

0 commit comments

Comments
 (0)