Mirrowel
diff --git a/‎.dockerignore‎
Lines changed: 44 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎.env.example‎
Lines changed: 204 additions & 36 deletions b/‎.env.example‎
Lines changed: 204 additions & 36 deletions
@@ -0,0 +1,44 @@
+# Git
+.git
+.gitignore
+
+# Python
+__pycache__
+*.py[cod]
+*$py.class
+*.so
+.Python
+.env
+.venv
+env/
+venv/
+ENV/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Build
+*.egg-info/
+dist/
+build/
+.eggs/
+
+# Logs (will be mounted as volume)
+logs/
+
+# OAuth credentials (will be mounted as volume)
+oauth_creds/
+
+# Documentation
+*.md
+!README.md
+
+# GitHub
+.github/
+
+# Misc
+.DS_Store
+*.log
@@ -14,7 +14,7 @@
 # A secret key used to authenticate requests to THIS proxy server.
 # This can be any string. Your client application must send this key in the
 # 'Authorization' header as a Bearer token (e.g., "Authorization: Bearer YOUR_PROXY_API_KEY").
-PROXY_API_KEY="YOUR_PROXY_API_KEY"
+#PROXY_API_KEY="YOUR_PROXY_API_KEY"
 
 
 # ------------------------------------------------------------------------------
@@ -30,40 +30,27 @@ PROXY_API_KEY="YOUR_PROXY_API_KEY"
 #
 
 # --- Google Gemini ---
-GEMINI_API_KEY_1="YOUR_GEMINI_API_KEY_1"
-GEMINI_API_KEY_2="YOUR_GEMINI_API_KEY_2"
+#GEMINI_API_KEY_1="YOUR_GEMINI_API_KEY_1"
+#GEMINI_API_KEY_2="YOUR_GEMINI_API_KEY_2"
 
 # --- OpenAI / Azure OpenAI ---
 # For Azure, ensure your key has access to the desired models.
-OPENAI_API_KEY_1="YOUR_OPENAI_OR_AZURE_API_KEY"
+#OPENAI_API_KEY_1="YOUR_OPENAI_OR_AZURE_API_KEY"
 
 # --- Anthropic (Claude) ---
-ANTHROPIC_API_KEY_1="YOUR_ANTHROPIC_API_KEY"
+#ANTHROPIC_API_KEY_1="YOUR_ANTHROPIC_API_KEY"
 
 # --- OpenRouter ---
-OPENROUTER_API_KEY_1="YOUR_OPENROUTER_API_KEY"
-
-# --- Groq ---
-GROQ_API_KEY_1="YOUR_GROQ_API_KEY"
+#OPENROUTER_API_KEY_1="YOUR_OPENROUTER_API_KEY"
 
 # --- Mistral AI ---
-MISTRAL_API_KEY_1="YOUR_MISTRAL_API_KEY"
+#MISTRAL_API_KEY_1="YOUR_MISTRAL_API_KEY"
 
 # --- NVIDIA NIM ---
-NVIDIA_API_KEY_1="YOUR_NVIDIA_API_KEY"
-
-# --- Co:here ---
-COHERE_API_KEY_1="YOUR_COHERE_API_KEY"
-
-# --- AWS Bedrock ---
-# Note: Bedrock authentication is typically handled via AWS IAM roles or
-# environment variables like AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.
-# Only set this if you are using a specific API key for Bedrock.
-BEDROCK_API_KEY_1=""
+#NVIDIA_NIM_API_KEY_1="YOUR_NVIDIA_API_KEY"
 
 # --- Chutes ---
-CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY"
-
+#CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY"
 
 # ------------------------------------------------------------------------------
 # | [OAUTH] Provider OAuth 2.0 Credentials                                     |
@@ -88,15 +75,15 @@ CHUTES_API_KEY_1="YOUR_CHUTES_API_KEY"
 # --- Google Gemini (gcloud CLI) ---
 # Path to your gcloud ADC file (e.g., ~/.config/gcloud/application_default_credentials.json)
 # or a credential file from the official 'gemini' CLI (e.g., ~/.gemini/credentials.json).
-GEMINI_CLI_OAUTH_1=""
+#GEMINI_CLI_OAUTH_1=""
 
 # --- Qwen / Dashscope (Code Companion) ---
 # Path to your Qwen credential file (e.g., ~/.qwen/oauth_creds.json).
-QWEN_CODE_OAUTH_1=""
+#QWEN_CODE_OAUTH_1=""
 
 # --- iFlow ---
 # Path to your iFlow credential file (e.g., ~/.iflow/oauth_creds.json).
-IFLOW_OAUTH_1=""
+#IFLOW_OAUTH_1=""
 
 
 # ------------------------------------------------------------------------------
@@ -106,7 +93,7 @@ IFLOW_OAUTH_1=""
 # --- Gemini CLI Project ID ---
 # Required if you are using the Gemini CLI OAuth provider and the proxy
 # cannot automatically determine your Google Cloud Project ID.
-GEMINI_CLI_PROJECT_ID=""
+#GEMINI_CLI_PROJECT_ID=""
 
 # --- Model Ignore Lists ---
 # Specify a comma-separated list of model names to exclude from a provider's
@@ -117,8 +104,8 @@ GEMINI_CLI_PROJECT_ID=""
 # Example:
 # IGNORE_MODELS_GEMINI="gemini-1.0-pro-vision-latest,gemini-1.0-pro-latest"
 # IGNORE_MODELS_OPENAI="gpt-4-turbo,gpt-3.5-turbo-instruct"
-IGNORE_MODELS_GEMINI=""
-IGNORE_MODELS_OPENAI=""
+#IGNORE_MODELS_GEMINI=""
+#IGNORE_MODELS_OPENAI=""
 
 # --- Model Whitelists (Overrides Blacklists) ---
 # Specify a comma-separated list of model names to ALWAYS include from a
@@ -140,8 +127,8 @@ IGNORE_MODELS_OPENAI=""
 # Example of a pure whitelist for Gemini:
 # IGNORE_MODELS_GEMINI="*"
 # WHITELIST_MODELS_GEMINI="gemini-1.5-pro-latest,gemini-1.5-flash-latest"
-WHITELIST_MODELS_GEMINI=""
-WHITELIST_MODELS_OPENAI=""
+#WHITELIST_MODELS_GEMINI=""
+#WHITELIST_MODELS_OPENAI=""
 
 # --- Maximum Concurrent Requests Per Key ---
 # Controls how many concurrent requests for the SAME model can use the SAME key.
@@ -154,10 +141,10 @@ WHITELIST_MODELS_OPENAI=""
 # MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=3  # Allow 3 concurrent requests per OpenAI key
 # MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1  # Allow only 1 request per Gemini key (default)
 #
-MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=1
-MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1
-MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
-MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
+#MAX_CONCURRENT_REQUESTS_PER_KEY_OPENAI=1
+#MAX_CONCURRENT_REQUESTS_PER_KEY_GEMINI=1
+#MAX_CONCURRENT_REQUESTS_PER_KEY_ANTHROPIC=1
+#MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
 
 # --- Credential Rotation Mode ---
 # Controls how credentials are rotated when multiple are available for a provider.
@@ -236,18 +223,199 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
 # QUOTA_GROUPS_ANTIGRAVITY_CLAUDE="claude-sonnet-4-5,claude-opus-4-5"
 # QUOTA_GROUPS_ANTIGRAVITY_GEMINI="gemini-3-pro-preview,gemini-3-pro-image-preview"
 
+# ------------------------------------------------------------------------------
+# | [ADVANCED] Fair Cycle Rotation                                              |
+# ------------------------------------------------------------------------------
+#
+# Ensures each credential exhausts at least once before any can be reused.
+# Prevents one credential from being repeatedly used while others sit idle.
+#
+# Provider Defaults (see src/rotator_library/config/defaults.py):
+#   - Enabled: sequential rotation mode only (balanced mode = disabled)
+#   - Tracking Mode: model_group (track per quota group)
+#   - Cross-Tier: false (each priority tier cycles independently)
+#   - Cycle Duration: 86400 seconds (24 hours)
+#   - Exhaustion Threshold: 300 seconds (5 minutes)
+#
+# Format: FAIR_CYCLE_{PROVIDER}=true/false
+# Example:
+# FAIR_CYCLE_ANTIGRAVITY=true
+# FAIR_CYCLE_GEMINI_CLI=false
+
+# Tracking mode: "model_group" (per quota group) or "credential" (global per key)
+# FAIR_CYCLE_TRACKING_MODE_ANTIGRAVITY=model_group
+
+# Cross-tier: true = ALL credentials must exhaust regardless of tier
+# FAIR_CYCLE_CROSS_TIER_ANTIGRAVITY=false
+
+# Cycle duration in seconds
+# FAIR_CYCLE_DURATION_ANTIGRAVITY=86400
+
+# Exhaustion threshold - cooldown must exceed this to count as "exhausted"
+# EXHAUSTION_COOLDOWN_THRESHOLD_ANTIGRAVITY=300
+# EXHAUSTION_COOLDOWN_THRESHOLD=300  # Global fallback for all providers
+
+# ------------------------------------------------------------------------------
+# | [ADVANCED] Custom Caps                                                      |
+# ------------------------------------------------------------------------------
+#
+# Set custom usage limits per tier, per model/group that are MORE restrictive
+# than actual API limits. When the cap is reached, credential goes on cooldown
+# BEFORE hitting the actual API limit.
+#
+# Cap values: absolute number (100) or percentage ("80%")
+# Cooldown modes: quota_reset | offset:<seconds> | fixed:<seconds>
+#
+# Format: CUSTOM_CAP_{PROVIDER}_T{TIER}_{MODEL_OR_GROUP}=<value>
+# Format: CUSTOM_CAP_COOLDOWN_{PROVIDER}_T{TIER}_{MODEL_OR_GROUP}=<mode>:<value>
+#
+# Name transformations for env vars:
+#   - Dashes (-) -> Underscores (_)
+#   - Dots (.) -> Underscores (_)
+#   - All UPPERCASE
+#   Example: claude-opus-4.5 -> CLAUDE_OPUS_4_5
+#
+# Tier syntax:
+#   - Single tier: T2 (tier 2)
+#   - Multi-tier: T2_3 (tiers 2 and 3 share config)
+#   - Default: TDEFAULT (fallback for unlisted tiers)
+#
+# Examples:
+# CUSTOM_CAP_ANTIGRAVITY_T2_CLAUDE=100
+# CUSTOM_CAP_COOLDOWN_ANTIGRAVITY_T2_CLAUDE=quota_reset
+#
+# CUSTOM_CAP_ANTIGRAVITY_T3_CLAUDE=30
+# CUSTOM_CAP_COOLDOWN_ANTIGRAVITY_T3_CLAUDE=offset:3600
+#
+# CUSTOM_CAP_ANTIGRAVITY_TDEFAULT_CLAUDE=80%
+#
+# CUSTOM_CAP_ANTIGRAVITY_T2_3_G25_FLASH=80%
+# CUSTOM_CAP_COOLDOWN_ANTIGRAVITY_T2_3_G25_FLASH=offset:1800
+
 # ------------------------------------------------------------------------------
 # | [ADVANCED] Proxy Configuration                                             |
 # ------------------------------------------------------------------------------
 
 # --- OAuth Refresh Interval ---
 # How often, in seconds, the background refresher should check and refresh
 # expired OAuth tokens.
-OAUTH_REFRESH_INTERVAL=600 # Default is 600 seconds (10 minutes)
+# Default: 600 (10 minutes)
+# OAUTH_REFRESH_INTERVAL=600
 
 # --- Skip OAuth Initialization ---
 # Set to "true" to prevent the proxy from performing the interactive OAuth
 # setup/validation flow on startup. This is highly recommended for non-interactive
 # environments like Docker containers or automated scripts.
 # Ensure your credentials in 'oauth_creds/' are valid before enabling this.
-SKIP_OAUTH_INIT_CHECK=false
+#SKIP_OAUTH_INIT_CHECK=false
+
+# --- Global Request Timeout ---
+# Maximum time (in seconds) a request can wait for an available credential.
+# If all credentials are on cooldown and none will become available within
+# this timeout, the request fails fast with a clear error message.
+# Increase this value if you have limited credentials and want to wait
+# longer for capacity (e.g., when credentials hit rate limits).
+# Default: 30 seconds
+# GLOBAL_TIMEOUT=30
+
+# ------------------------------------------------------------------------------
+# | [ADVANCED] HTTP Timeout Configuration                                       |
+# ------------------------------------------------------------------------------
+#
+# Controls timeouts for HTTP requests to provider APIs.
+# All values are in seconds.
+#
+
+# Connection establishment timeout (default: 30)
+# TIMEOUT_CONNECT=30
+
+# Request body send timeout (default: 30)
+# TIMEOUT_WRITE=30
+
+# Connection pool acquisition timeout (default: 60)
+# TIMEOUT_POOL=60
+
+# Read timeout between streaming chunks (default: 300 = 5 minutes)
+# If no data arrives for this duration, the connection is considered stalled.
+# TIMEOUT_READ_STREAMING=300
+
+# Read timeout for non-streaming responses (default: 600 = 10 minutes)
+# Some LLM responses take significant time to generate.
+# TIMEOUT_READ_NON_STREAMING=600
+
+# ------------------------------------------------------------------------------
+# | [ADVANCED] Antigravity Provider Configuration                               |
+# ------------------------------------------------------------------------------
+#
+# Configuration for the Antigravity (Google Code Assist) provider.
+# These settings control retry behavior and prompt handling.
+#
+
+# --- Empty Response Handling ---
+# When Antigravity returns an empty response (no content, no tool calls),
+# the proxy will automatically retry up to this many attempts.
+# Default: 6 attempts
+# ANTIGRAVITY_EMPTY_RESPONSE_ATTEMPTS=6
+
+# Delay in seconds between empty response retries.
+# Default: 3 seconds
+# ANTIGRAVITY_EMPTY_RESPONSE_RETRY_DELAY=3
+
+# --- Malformed Function Call Handling ---
+# When Gemini 3 returns MALFORMED_FUNCTION_CALL (invalid JSON syntax),
+# the proxy injects corrective messages and retries.
+# Default: 2 retries
+# ANTIGRAVITY_MALFORMED_CALL_RETRIES=2
+
+# Delay in seconds between malformed call retries.
+# Default: 1 second
+# ANTIGRAVITY_MALFORMED_CALL_DELAY=1
+
+# --- System Instruction Configuration ---
+# When true, prepend the Antigravity agent system instruction.
+# Default: true
+# ANTIGRAVITY_PREPEND_INSTRUCTION=true
+
+# When true, inject an identity override instruction after the Antigravity prompt.
+# This tells the model to disregard the Antigravity identity.
+# Default: true
+# ANTIGRAVITY_INJECT_IDENTITY_OVERRIDE=true
+
+# When true, use shortened versions of prompts to reduce context bloat.
+# Default: true
+# ANTIGRAVITY_USE_SHORT_PROMPTS=true
+
+# ------------------------------------------------------------------------------
+# | [ADVANCED] Gemini CLI Provider Configuration                                |
+# ------------------------------------------------------------------------------
+#
+# Configuration for the Gemini CLI (Google Code Assist) provider.
+#
+
+# OAuth callback port for interactive re-authentication.
+# Default: 8085
+# GEMINI_CLI_OAUTH_PORT=8085
+
+# ------------------------------------------------------------------------------
+# | [ADVANCED] Antigravity OAuth Configuration                                  |
+# ------------------------------------------------------------------------------
+#
+# OAuth callback port for Antigravity interactive re-authentication.
+# Default: 8085 (same as Gemini CLI, shared)
+# ANTIGRAVITY_OAUTH_PORT=8085
+
+# ------------------------------------------------------------------------------
+# | [ADVANCED] Debugging / Logging                                              |
+# ------------------------------------------------------------------------------
+
+# --- LiteLLM Pydantic Warning Suppression ---
+# LiteLLM produces harmless Pydantic serialization warnings during streaming
+# due to a known issue with response types (Message, StreamingChoices) having
+# mismatched field counts. These warnings don't affect functionality.
+# See: https://github.com/BerriAI/litellm/issues/11759
+#
+# NOTE: This is a workaround. Remove once litellm patches the issue above.
+#
+# Set to "0" to show these warnings (useful for debugging).
+# Default: "1" (suppress warnings)
+# SUPPRESS_LITELLM_SERIALIZATION_WARNINGS=1