Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Required: Your OpenAI API key
# Required: Your OpenAI API key(s)
# Single key:
OPENAI_API_KEY="sk-your-openai-api-key-here"

# Multiple keys (comma-separated for load balancing and failover):
# OPENAI_API_KEY="sk-key1,sk-key2,sk-key3"

# Optional: Expected Anthropic API key for client validation
# If set, clients must provide this exact API key to access the proxy
ANTHROPIC_API_KEY="your-expected-anthropic-api-key"
Expand Down Expand Up @@ -34,6 +38,7 @@ MAX_RETRIES="2"

# For Azure OpenAI (recommended if OpenAI is not available in your region):
# OPENAI_API_KEY="your-azure-api-key"
# Multiple Azure keys: OPENAI_API_KEY="azure-key1,azure-key2"
# OPENAI_BASE_URL="https://your-resource-name.openai.azure.com/openai/deployments/your-deployment-name"
# AZURE_API_VERSION="2024-03-01-preview"
# BIG_MODEL="gpt-4"
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,3 @@ poetry.toml
pyrightconfig.json

# End of https://www.toptal.com/developers/gitignore/api/python
n
39 changes: 38 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ ANTHROPIC_BASE_URL=http://localhost:8082 ANTHROPIC_API_KEY="exact-matching-key"

**Required:**

- `OPENAI_API_KEY` - Your API key for the target provider
- `OPENAI_API_KEY` - Your API key(s) for the target provider
- Single key: `OPENAI_API_KEY="sk-your-key"`
- Multiple keys (comma-separated): `OPENAI_API_KEY="sk-key1,sk-key2,sk-key3"`
- Multiple keys support automatic load balancing and failover

**Security:**

Expand Down Expand Up @@ -134,6 +137,40 @@ SMALL_MODEL="llama3.1:8b"

Any OpenAI-compatible API can be used by setting the appropriate `OPENAI_BASE_URL`.

## Multiple API Keys Support

The proxy now supports multiple OpenAI API keys for improved reliability and load distribution:

### Configuration

```bash
# Multiple keys separated by commas
OPENAI_API_KEY="sk-key1,sk-key2,sk-key3"
```

### Features

- **Load Balancing**: Requests are distributed across all available keys using round-robin
- **Automatic Failover**: If one key fails (rate limit, auth error), the proxy automatically tries the next key
- **Cooldown Management**: Failed keys are temporarily disabled (5 minutes by default) before being retried
- **Status Monitoring**: Check the status of all keys via `/api-keys/status` endpoint

### Monitoring API Keys

```bash
# Check status of all API keys
curl http://localhost:8082/api-keys/status

# Reset all failed keys (remove from cooldown)
curl -X POST http://localhost:8082/api-keys/reset
```

### Benefits

- **Higher Rate Limits**: Combine rate limits from multiple API keys
- **Better Reliability**: Service continues even if some keys fail
- **Reduced Downtime**: Automatic failover prevents service interruption

## Usage Examples

### Basic Chat
Expand Down
30 changes: 27 additions & 3 deletions src/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
router = APIRouter()

openai_client = OpenAIClient(
config.openai_api_key,
config.openai_api_keys,
config.openai_base_url,
config.request_timeout,
api_version=config.azure_api_version,
Expand Down Expand Up @@ -159,10 +159,14 @@ async def count_tokens(request: ClaudeTokenCountRequest, _: None = Depends(valid
@router.get("/health")
async def health_check():
"""Health check endpoint"""
api_key_status = openai_client.get_api_key_status()
return {
"status": "healthy",
"timestamp": datetime.now().isoformat(),
"openai_api_configured": bool(config.openai_api_key),
"openai_api_configured": bool(config.openai_api_keys),
"api_key_count": config.get_api_key_count(),
"available_api_keys": api_key_status["available_keys"],
"failed_api_keys": api_key_status["failed_keys"],
"api_key_valid": config.validate_api_key(),
"client_api_key_validation": bool(config.anthropic_api_key),
}
Expand Down Expand Up @@ -216,7 +220,8 @@ async def root():
"config": {
"openai_base_url": config.openai_base_url,
"max_tokens_limit": config.max_tokens_limit,
"api_key_configured": bool(config.openai_api_key),
"api_key_configured": bool(config.openai_api_keys),
"api_key_count": config.get_api_key_count(),
"client_api_key_validation": bool(config.anthropic_api_key),
"big_model": config.big_model,
"small_model": config.small_model,
Expand All @@ -226,5 +231,24 @@ async def root():
"count_tokens": "/v1/messages/count_tokens",
"health": "/health",
"test_connection": "/test-connection",
"api_keys_status": "/api-keys/status",
"api_keys_reset": "/api-keys/reset",
},
}


@router.get("/api-keys/status")
async def api_keys_status():
"""Get detailed status of all API keys"""
return openai_client.get_api_key_status()


@router.post("/api-keys/reset")
async def reset_api_keys():
"""Reset all failed API keys (remove from cooldown)"""
openai_client.reset_api_key_failures()
return {
"message": "All API key failures have been reset",
"timestamp": datetime.now().isoformat(),
"status": openai_client.get_api_key_status()
}
130 changes: 130 additions & 0 deletions src/core/api_key_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import asyncio
import time
from typing import List, Optional, Dict, Set
from threading import Lock
import logging

logger = logging.getLogger(__name__)

class APIKeyManager:
"""Manages multiple OpenAI API keys with round-robin distribution and error handling."""

def __init__(self, api_keys: List[str], cooldown_period: int = 300):
"""
Initialize the API key manager.

Args:
api_keys: List of OpenAI API keys
cooldown_period: Time in seconds to wait before retrying a failed key
"""
self.api_keys = api_keys
self.cooldown_period = cooldown_period
self.current_index = 0
self.failed_keys: Dict[str, float] = {} # key -> timestamp of failure
self.lock = Lock()

logger.info(f"Initialized API key manager with {len(api_keys)} keys")

def get_next_key(self) -> Optional[str]:
"""
Get the next available API key using round-robin strategy.

Returns:
Next available API key or None if all keys are in cooldown
"""
with self.lock:
current_time = time.time()

# Clean up expired cooldowns
expired_keys = [
key for key, fail_time in self.failed_keys.items()
if current_time - fail_time > self.cooldown_period
]
for key in expired_keys:
del self.failed_keys[key]
logger.info(f"API key cooldown expired, key is available again")

# Find next available key
attempts = 0
while attempts < len(self.api_keys):
key = self.api_keys[self.current_index]
self.current_index = (self.current_index + 1) % len(self.api_keys)

if key not in self.failed_keys:
logger.debug(f"Selected API key index {self.current_index - 1}")
return key

attempts += 1

# All keys are in cooldown
logger.warning("All API keys are in cooldown period")
return None

def mark_key_failed(self, api_key: str, error_message: str = ""):
"""
Mark an API key as failed and put it in cooldown.

Args:
api_key: The failed API key
error_message: Optional error message for logging
"""
with self.lock:
self.failed_keys[api_key] = time.time()
key_index = self.api_keys.index(api_key) if api_key in self.api_keys else -1
logger.warning(f"API key (index {key_index}) marked as failed: {error_message}")

def get_available_key_count(self) -> int:
"""Get the number of currently available (not in cooldown) API keys."""
with self.lock:
current_time = time.time()
available_count = 0

for key in self.api_keys:
if key not in self.failed_keys:
available_count += 1
elif current_time - self.failed_keys[key] > self.cooldown_period:
available_count += 1

return available_count

def get_status(self) -> Dict:
"""Get the current status of all API keys."""
with self.lock:
current_time = time.time()
status = {
"total_keys": len(self.api_keys),
"available_keys": 0,
"failed_keys": 0,
"keys_status": []
}

for i, key in enumerate(self.api_keys):
key_status = {
"index": i,
"key_prefix": key[:10] + "..." if len(key) > 10 else key,
"status": "available"
}

if key in self.failed_keys:
fail_time = self.failed_keys[key]
time_since_failure = current_time - fail_time

if time_since_failure > self.cooldown_period:
key_status["status"] = "available"
status["available_keys"] += 1
else:
key_status["status"] = "cooldown"
key_status["cooldown_remaining"] = int(self.cooldown_period - time_since_failure)
status["failed_keys"] += 1
else:
status["available_keys"] += 1

status["keys_status"].append(key_status)

return status

def reset_all_failures(self):
"""Reset all failed keys (remove from cooldown)."""
with self.lock:
self.failed_keys.clear()
logger.info("All API key failures have been reset")
Loading