1414# A secret key used to authenticate requests to THIS proxy server.
1515# This can be any string. Your client application must send this key in the
1616# 'Authorization' header as a Bearer token (e.g., "Authorization: Bearer YOUR_PROXY_API_KEY").
17- PROXY_API_KEY = " YOUR_PROXY_API_KEY"
17+ # PROXY_API_KEY="YOUR_PROXY_API_KEY"
1818
1919
2020# ------------------------------------------------------------------------------
@@ -30,40 +30,27 @@ PROXY_API_KEY="YOUR_PROXY_API_KEY"
3030#
3131
3232# --- Google Gemini ---
33- GEMINI_API_KEY_1 = " YOUR_GEMINI_API_KEY_1"
34- GEMINI_API_KEY_2 = " YOUR_GEMINI_API_KEY_2"
33+ # GEMINI_API_KEY_1="YOUR_GEMINI_API_KEY_1"
34+ # GEMINI_API_KEY_2="YOUR_GEMINI_API_KEY_2"
3535
3636# --- OpenAI / Azure OpenAI ---
3737# For Azure, ensure your key has access to the desired models.
38- OPENAI_API_KEY_1 = " YOUR_OPENAI_OR_AZURE_API_KEY"
38+ # OPENAI_API_KEY_1="YOUR_OPENAI_OR_AZURE_API_KEY"
3939
4040# --- Anthropic (Claude) ---
41- ANTHROPIC_API_KEY_1 = " YOUR_ANTHROPIC_API_KEY"
41+ # ANTHROPIC_API_KEY_1="YOUR_ANTHROPIC_API_KEY"
4242
4343# --- OpenRouter ---
44- OPENROUTER_API_KEY_1 = " YOUR_OPENROUTER_API_KEY"
45-
46- # --- Groq ---
47- GROQ_API_KEY_1 = " YOUR_GROQ_API_KEY"
44+ # OPENROUTER_API_KEY_1="YOUR_OPENROUTER_API_KEY"
4845
4946# --- Mistral AI ---
50- MISTRAL_API_KEY_1 = " YOUR_MISTRAL_API_KEY"
47+ # MISTRAL_API_KEY_1="YOUR_MISTRAL_API_KEY"
5148
5249# --- NVIDIA NIM ---
53- NVIDIA_API_KEY_1 = " YOUR_NVIDIA_API_KEY"
54-
55- # --- Co:here ---
56- COHERE_API_KEY_1 = " YOUR_COHERE_API_KEY"
57-
58- # --- AWS Bedrock ---
59- # Note: Bedrock authentication is typically handled via AWS IAM roles or
60- # environment variables like AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.
61- # Only set this if you are using a specific API key for Bedrock.
62- BEDROCK_API_KEY_1 = " "
50+ # NVIDIA_NIM_API_KEY_1="YOUR_NVIDIA_API_KEY"
6351
6452# --- Chutes ---
65- CHUTES_API_KEY_1 = " YOUR_CHUTES_API_KEY"
66-
53+ # CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY"
6754
6855# ------------------------------------------------------------------------------
6956# | [OAUTH] Provider OAuth 2.0 Credentials |
@@ -88,15 +75,15 @@ CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY"
8875# --- Google Gemini (gcloud CLI) ---
8976# Path to your gcloud ADC file (e.g., ~/.config/gcloud/application_default_credentials.json)
9077# or a credential file from the official 'gemini' CLI (e.g., ~/.gemini/credentials.json).
91- GEMINI_CLI_OAUTH_1 = " "
78+ # GEMINI_CLI_OAUTH_1=""
9279
9380# --- Qwen / Dashscope (Code Companion) ---
9481# Path to your Qwen credential file (e.g., ~/.qwen/oauth_creds.json).
95- QWEN_CODE_OAUTH_1 = " "
82+ # QWEN_CODE_OAUTH_1=""
9683
9784# --- iFlow ---
9885# Path to your iFlow credential file (e.g., ~/.iflow/oauth_creds.json).
99- IFLOW_OAUTH_1 = " "
86+ # IFLOW_OAUTH_1=""
10087
10188
10289# ------------------------------------------------------------------------------
@@ -106,7 +93,7 @@ IFLOW_OAUTH_1=""
10693# --- Gemini CLI Project ID ---
10794# Required if you are using the Gemini CLI OAuth provider and the proxy
10895# cannot automatically determine your Google Cloud Project ID.
109- GEMINI_CLI_PROJECT_ID = " "
96+ # GEMINI_CLI_PROJECT_ID=""
11097
11198# --- Model Ignore Lists ---
11299# Specify a comma-separated list of model names to exclude from a provider's
@@ -117,8 +104,8 @@ GEMINI_CLI_PROJECT_ID=""
117104# Example:
118105# IGNORE_MODELS_GEMINI="gemini-1.0-pro-vision-latest,gemini-1.0-pro-latest"
119106# IGNORE_MODELS_OPENAI="gpt-4-turbo,gpt-3.5-turbo-instruct"
120- IGNORE_MODELS_GEMINI = " "
121- IGNORE_MODELS_OPENAI = " "
107+ # IGNORE_MODELS_GEMINI=""
108+ # IGNORE_MODELS_OPENAI=""
122109
123110# --- Model Whitelists (Overrides Blacklists) ---
124111# Specify a comma-separated list of model names to ALWAYS include from a
@@ -140,8 +127,8 @@ IGNORE_MODELS_OPENAI=""
140127# Example of a pure whitelist for Gemini:
141128# IGNORE_MODELS_GEMINI="*"
142129# WHITELIST_MODELS_GEMINI="gemini-1.5-pro-latest,gemini-1.5-flash-latest"
143- WHITELIST_MODELS_GEMINI = " "
144- WHITELIST_MODELS_OPENAI = " "
130+ # WHITELIST_MODELS_GEMINI=""
131+ # WHITELIST_MODELS_OPENAI=""
145132
146133# --- Maximum Concurrent Requests Per Key ---
147134# Controls how many concurrent requests for the SAME model can use the SAME key.
@@ -154,10 +141,10 @@ WHITELIST_MODELS_OPENAI=""
154141# MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=3 # Allow 3 concurrent requests per OpenAI key
155142# MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1 # Allow only 1 request per Gemini key (default)
156143#
157- MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI = 1
158- MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI = 1
159- MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC = 1
160- MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW = 1
144+ # MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=1
145+ # MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1
146+ # MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
147+ # MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
161148
162149# --- Credential Rotation Mode ---
163150# Controls how credentials are rotated when multiple are available for a provider.
@@ -236,18 +223,199 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
236223# QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
237224# QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
238225
226+ # ------------------------------------------------------------------------------
227+ # | [ADVANCED] Fair Cycle Rotation |
228+ # ------------------------------------------------------------------------------
229+ #
230+ # Ensures each credential exhausts at least once before any can be reused.
231+ # Prevents one credential from being repeatedly used while others sit idle.
232+ #
233+ # Provider Defaults (see src/rotator_library/config/defaults.py):
234+ # - Enabled: sequential rotation mode only (balanced mode = disabled)
235+ # - Tracking Mode: model_group (track per quota group)
236+ # - Cross-Tier: false (each priority tier cycles independently)
237+ # - Cycle Duration: 86400 seconds (24 hours)
238+ # - Exhaustion Threshold: 300 seconds (5 minutes)
239+ #
240+ # Format: FAIR_CYCLE_{PROVIDER}=true/false
241+ # Example:
242+ # FAIR_CYCLE_ANTIGRAVITY=true
243+ # FAIR_CYCLE_GEMINI_CLI=false
244+
245+ # Tracking mode: "model_group" (per quota group) or "credential" (global per key)
246+ # FAIR_CYCLE_TRACKING_MODE_ANTIGRAVITY=model_group
247+
248+ # Cross-tier: true = ALL credentials must exhaust regardless of tier
249+ # FAIR_CYCLE_CROSS_TIER_ANTIGRAVITY=false
250+
251+ # Cycle duration in seconds
252+ # FAIR_CYCLE_DURATION_ANTIGRAVITY=86400
253+
254+ # Exhaustion threshold - cooldown must exceed this to count as "exhausted"
255+ # EXHAUSTION_COOLDOWN_THRESHOLD_ANTIGRAVITY=300
256+ # EXHAUSTION_COOLDOWN_THRESHOLD=300 # Global fallback for all providers
257+
258+ # ------------------------------------------------------------------------------
259+ # | [ADVANCED] Custom Caps |
260+ # ------------------------------------------------------------------------------
261+ #
262+ # Set custom usage limits per tier, per model/group that are MORE restrictive
263+ # than actual API limits. When the cap is reached, credential goes on cooldown
264+ # BEFORE hitting the actual API limit.
265+ #
266+ # Cap values: absolute number (100) or percentage ("80%")
267+ # Cooldown modes: quota_reset | offset:<seconds> | fixed:<seconds>
268+ #
269+ # Format: CUSTOM_CAP_{PROVIDER}_T{TIER}_{MODEL_OR_GROUP}=<value>
270+ # Format: CUSTOM_CAP_COOLDOWN_{PROVIDER}_T{TIER}_{MODEL_OR_GROUP}=<mode>:<value>
271+ #
272+ # Name transformations for env vars:
273+ # - Dashes (-) -> Underscores (_)
274+ # - Dots (.) -> Underscores (_)
275+ # - All UPPERCASE
276+ # Example: claude-opus-4.5 -> CLAUDE_OPUS_4_5
277+ #
278+ # Tier syntax:
279+ # - Single tier: T2 (tier 2)
280+ # - Multi-tier: T2_3 (tiers 2 and 3 share config)
281+ # - Default: TDEFAULT (fallback for unlisted tiers)
282+ #
283+ # Examples:
284+ # CUSTOM_CAP_ANTIGRAVITY_T2_CLAUDE=100
285+ # CUSTOM_CAP_COOLDOWN_ANTIGRAVITY_T2_CLAUDE=quota_reset
286+ #
287+ # CUSTOM_CAP_ANTIGRAVITY_T3_CLAUDE=30
288+ # CUSTOM_CAP_COOLDOWN_ANTIGRAVITY_T3_CLAUDE=offset:3600
289+ #
290+ # CUSTOM_CAP_ANTIGRAVITY_TDEFAULT_CLAUDE=80%
291+ #
292+ # CUSTOM_CAP_ANTIGRAVITY_T2_3_G25_FLASH=80%
293+ # CUSTOM_CAP_COOLDOWN_ANTIGRAVITY_T2_3_G25_FLASH=offset:1800
294+
239295# ------------------------------------------------------------------------------
240296# | [ADVANCED] Proxy Configuration |
241297# ------------------------------------------------------------------------------
242298
243299# --- OAuth Refresh Interval ---
244300# How often, in seconds, the background refresher should check and refresh
245301# expired OAuth tokens.
246- OAUTH_REFRESH_INTERVAL = 600 # Default is 600 seconds (10 minutes)
302+ # Default: 600 (10 minutes)
303+ # OAUTH_REFRESH_INTERVAL=600
247304
248305# --- Skip OAuth Initialization ---
249306# Set to "true" to prevent the proxy from performing the interactive OAuth
250307# setup/validation flow on startup. This is highly recommended for non-interactive
251308# environments like Docker containers or automated scripts.
252309# Ensure your credentials in 'oauth_creds/' are valid before enabling this.
253- SKIP_OAUTH_INIT_CHECK = false
310+ # SKIP_OAUTH_INIT_CHECK=false
311+
312+ # --- Global Request Timeout ---
313+ # Maximum time (in seconds) a request can wait for an available credential.
314+ # If all credentials are on cooldown and none will become available within
315+ # this timeout, the request fails fast with a clear error message.
316+ # Increase this value if you have limited credentials and want to wait
317+ # longer for capacity (e.g., when credentials hit rate limits).
318+ # Default: 30 seconds
319+ # GLOBAL_TIMEOUT=30
320+
321+ # ------------------------------------------------------------------------------
322+ # | [ADVANCED] HTTP Timeout Configuration |
323+ # ------------------------------------------------------------------------------
324+ #
325+ # Controls timeouts for HTTP requests to provider APIs.
326+ # All values are in seconds.
327+ #
328+
329+ # Connection establishment timeout (default: 30)
330+ # TIMEOUT_CONNECT=30
331+
332+ # Request body send timeout (default: 30)
333+ # TIMEOUT_WRITE=30
334+
335+ # Connection pool acquisition timeout (default: 60)
336+ # TIMEOUT_POOL=60
337+
338+ # Read timeout between streaming chunks (default: 300 = 5 minutes)
339+ # If no data arrives for this duration, the connection is considered stalled.
340+ # TIMEOUT_READ_STREAMING=300
341+
342+ # Read timeout for non-streaming responses (default: 600 = 10 minutes)
343+ # Some LLM responses take significant time to generate.
344+ # TIMEOUT_READ_NON_STREAMING=600
345+
346+ # ------------------------------------------------------------------------------
347+ # | [ADVANCED] Antigravity Provider Configuration |
348+ # ------------------------------------------------------------------------------
349+ #
350+ # Configuration for the Antigravity (Google Code Assist) provider.
351+ # These settings control retry behavior and prompt handling.
352+ #
353+
354+ # --- Empty Response Handling ---
355+ # When Antigravity returns an empty response (no content, no tool calls),
356+ # the proxy will automatically retry up to this many attempts.
357+ # Default: 6 attempts
358+ # ANTIGRAVITY_EMPTY_RESPONSE_ATTEMPTS=6
359+
360+ # Delay in seconds between empty response retries.
361+ # Default: 3 seconds
362+ # ANTIGRAVITY_EMPTY_RESPONSE_RETRY_DELAY=3
363+
364+ # --- Malformed Function Call Handling ---
365+ # When Gemini 3 returns MALFORMED_FUNCTION_CALL (invalid JSON syntax),
366+ # the proxy injects corrective messages and retries.
367+ # Default: 2 retries
368+ # ANTIGRAVITY_MALFORMED_CALL_RETRIES=2
369+
370+ # Delay in seconds between malformed call retries.
371+ # Default: 1 second
372+ # ANTIGRAVITY_MALFORMED_CALL_DELAY=1
373+
374+ # --- System Instruction Configuration ---
375+ # When true, prepend the Antigravity agent system instruction.
376+ # Default: true
377+ # ANTIGRAVITY_PREPEND_INSTRUCTION=true
378+
379+ # When true, inject an identity override instruction after the Antigravity prompt.
380+ # This tells the model to disregard the Antigravity identity.
381+ # Default: true
382+ # ANTIGRAVITY_INJECT_IDENTITY_OVERRIDE=true
383+
384+ # When true, use shortened versions of prompts to reduce context bloat.
385+ # Default: true
386+ # ANTIGRAVITY_USE_SHORT_PROMPTS=true
387+
388+ # ------------------------------------------------------------------------------
389+ # | [ADVANCED] Gemini CLI Provider Configuration |
390+ # ------------------------------------------------------------------------------
391+ #
392+ # Configuration for the Gemini CLI (Google Code Assist) provider.
393+ #
394+
395+ # OAuth callback port for interactive re-authentication.
396+ # Default: 8085
397+ # GEMINI_CLI_OAUTH_PORT=8085
398+
399+ # ------------------------------------------------------------------------------
400+ # | [ADVANCED] Antigravity OAuth Configuration |
401+ # ------------------------------------------------------------------------------
402+ #
403+ # OAuth callback port for Antigravity interactive re-authentication.
404+ # Default: 8085 (same as Gemini CLI, shared)
405+ # ANTIGRAVITY_OAUTH_PORT=8085
406+
407+ # ------------------------------------------------------------------------------
408+ # | [ADVANCED] Debugging / Logging |
409+ # ------------------------------------------------------------------------------
410+
411+ # --- LiteLLM Pydantic Warning Suppression ---
412+ # LiteLLM produces harmless Pydantic serialization warnings during streaming
413+ # due to a known issue with response types (Message, StreamingChoices) having
414+ # mismatched field counts. These warnings don't affect functionality.
415+ # See: https://github.com/BerriAI/litellm/issues/11759
416+ #
417+ # NOTE: This is a workaround. Remove once litellm patches the issue above.
418+ #
419+ # Set to "0" to show these warnings (useful for debugging).
420+ # Default: "1" (suppress warnings)
421+ # SUPPRESS_LITELLM_SERIALIZATION_WARNINGS=1
0 commit comments