diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..b7b6e892 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,44 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +.env +.venv +env/ +venv/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Build +*.egg-info/ +dist/ +build/ +.eggs/ + +# Logs (will be mounted as volume) +logs/ + +# OAuth credentials (will be mounted as volume) +oauth_creds/ + +# Documentation +*.md +!README.md + +# GitHub +.github/ + +# Misc +.DS_Store +*.log diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index af6d6cc2..8beb9b6d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,7 @@ jobs: os: [windows-latest, ubuntu-latest, macos-latest] steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up uv uses: astral-sh/setup-uv@v4 @@ -121,7 +121,7 @@ jobs: WHITELISTED_BRANCHES: "main" steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: fetch-depth: 0 diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 00000000..230b1f9c --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,134 @@ +# +name: Create and publish a Docker image + +# Configures this workflow to run every time a change is pushed to the branch called `main` or `dev`. +on: + workflow_dispatch: + inputs: + ref: + description: 'Git ref to build (branch, tag, or commit SHA). Leave empty for default.' + required: false + default: '' + push: + branches: ["main", "dev"] + paths: + - "src/proxy_app/**" + - "src/rotator_library/**" + - ".github/workflows/docker-build.yml" + - "Dockerfile" + - "requirements.txt" + +# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. +jobs: + build-and-push-image: + runs-on: ubuntu-latest + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + attestations: write + id-token: write + # + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + ref: ${{ github.event.inputs.ref || '' }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # Generate version tags based on branch and commit info + - name: Generate image tags + id: tags + run: | + # Get branch name + BRANCH_NAME=${GITHUB_REF#refs/heads/} + + # Generate date-time version (YYYYMMDD-HHMMSS) + DATE_VERSION=$(date -u +'%Y%m%d-%H%M%S') + + # Generate short SHA version + SHORT_SHA=${GITHUB_SHA::7} + + # Combined version tag + VERSION_TAG="${DATE_VERSION}-${SHORT_SHA}" + + # Determine the latest tag based on branch + if [ "$BRANCH_NAME" == "main" ]; then + LATEST_TAG="latest" + else + LATEST_TAG="${BRANCH_NAME}-latest" + fi + + # Set outputs + echo "latest_tag=$LATEST_TAG" >> $GITHUB_OUTPUT + echo "version_tag=$VERSION_TAG" >> $GITHUB_OUTPUT + echo "branch=$BRANCH_NAME" >> $GITHUB_OUTPUT + + echo "Generated tags:" + echo " Latest: $LATEST_TAG" + echo " Version: $VERSION_TAG" + + # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=${{ steps.tags.outputs.latest_tag }} + type=raw,value=${{ steps.tags.outputs.version_tag }} + # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. + # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see [Usage](https://github.com/docker/build-push-action#usage) in the README of the `docker/build-push-action` repository. + # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. + - name: Build and push Docker image + id: push + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + provenance: false + sbom: false + + # This step generates an artifact attestation for the image, which is an unforgeable statement about where and how it was built. It increases supply chain security for people who consume the image. For more information, see [Using artifact attestations to establish provenance for builds](/actions/security-guides/using-artifact-attestations-to-establish-provenance-for-builds). + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v3 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: false + + # Cleanup old container images to keep only the most recent versions + cleanup-old-images: + runs-on: ubuntu-latest + needs: build-and-push-image + permissions: + packages: write + steps: + - name: Delete old container images + uses: actions/delete-package-versions@v5 + with: + package-name: llm-api-key-proxy + package-type: container + min-versions-to-keep: 20 + delete-only-untagged-versions: false + ignore-versions: ".*latest.*" diff --git a/Deployment guide.md b/Deployment guide.md index ac8c2d7b..44c7e033 100644 --- a/Deployment guide.md +++ b/Deployment guide.md @@ -43,7 +43,6 @@ git clone https://github.com/YOUR-USERNAME/LLM-API-Key-Proxy.git cd LLM-API-Key-Proxy ``` - ## Step 3: Assemble Your .env File The proxy uses a `.env` file to store your API keys securely. We'll create this based on the repo's documentation. @@ -51,8 +50,8 @@ The proxy uses a `.env` file to store your API keys securely. We'll create this 1. In your cloned repo, copy the example: `copy .env.example .env` (Windows) or `cp .env.example .env` (macOS/Linux). 2. Open `.env` in a text editor (e.g., Notepad or VS Code). 3. Add your keys following the format from the repo's README and [LiteLLM Providers Documentation](https://docs.litellm.ai/docs/providers): - - **PROXY_API_KEY**: Create a strong, unique secret (e.g., "my-super-secret-proxy-key"). This authenticates requests to your proxy. - - **Provider Keys**: Add keys for your chosen providers. You can add multiple per provider (e.g., _1, _2) for rotation. + - **PROXY_API_KEY**: Create a strong, unique secret (e.g., "my-super-secret-proxy-key"). This authenticates requests to your proxy. + - **Provider Keys**: Add keys for your chosen providers. You can add multiple per provider (e.g., \_1, \_2) for rotation. Example `.env` (customize with your real keys): @@ -71,6 +70,7 @@ OPENROUTER_API_KEY_1="your-openrouter-key" - Tip: Start with 1-2 providers to test. Don't share this file publicly! ### Advanced: Stateless Deployment for OAuth Providers (Gemini CLI, Qwen, iFlow) + If you are using providers that require complex OAuth files (like **Gemini CLI**, **Qwen Code**, or **iFlow**), you don't need to upload the JSON files manually. The proxy includes a tool to "export" these credentials into environment variables. 1. Run the credential tool locally: `python -m rotator_library.credential_tool` @@ -79,21 +79,23 @@ If you are using providers that require complex OAuth files (like **Gemini CLI** 4. Copy the contents of this file and paste them directly into your `.env` file or Render's "Environment Variables" section. 5. The proxy will automatically detect and use these variables—no file upload required! - ### Advanced: Antigravity OAuth Provider The Antigravity provider requires OAuth2 authentication similar to Gemini CLI. It provides access to: + - Gemini 2.5 models (Pro/Flash) - Gemini 3 models (Pro/Image-preview) - **requires paid-tier Google Cloud project** - Claude Sonnet 4.5 via Google's Antigravity proxy **Setting up Antigravity locally:** + 1. Run the credential tool: `python -m rotator_library.credential_tool` 2. Select "Add OAuth Credential" and choose "Antigravity" 3. Complete the OAuth flow in your browser 4. The credential is saved to `oauth_creds/antigravity_oauth_1.json` **Exporting for stateless deployment:** + 1. Run: `python -m rotator_library.credential_tool` 2. Select "Export Antigravity to .env" 3. Copy the generated environment variables to your deployment platform: @@ -105,6 +107,7 @@ The Antigravity provider requires OAuth2 authentication similar to Gemini CLI. I ``` **Important Notes:** + - Antigravity uses Google OAuth with additional scopes for cloud platform access - Gemini 3 models require a paid-tier Google Cloud project (free tier will fail) - The provider automatically handles thought signature caching for multi-turn conversations @@ -112,7 +115,6 @@ The Antigravity provider requires OAuth2 authentication similar to Gemini CLI. I 4. Save the file. (We'll upload it to Render in Step 5.) - ## Step 4: Create a New Web Service on Render 1. Log in to render.com and go to your Dashboard. @@ -120,13 +122,13 @@ The Antigravity provider requires OAuth2 authentication similar to Gemini CLI. I 3. Choose **Build and deploy from a Git repository** > **Next**. 4. Connect your GitHub account and select your forked repo. 5. In the setup form: - - **Name**: Something like "llm-api-key-proxy". - - **Region**: Choose one close to you (e.g., Oregon for US West). - - **Branch**: "main" (or your default). - - **Runtime**: Python 3. - - **Build Command**: `pip install -r requirements.txt`. - - **Start Command**: `uvicorn src.proxy_app.main:app --host 0.0.0.0 --port $PORT`. - - **Instance Type**: Free (for testing; upgrade later for always-on service). + - **Name**: Something like "llm-api-key-proxy". + - **Region**: Choose one close to you (e.g., Oregon for US West). + - **Branch**: "main" (or your default). + - **Runtime**: Python 3. + - **Build Command**: `pip install -r requirements.txt`. + - **Start Command**: `uvicorn src.proxy_app.main:app --host 0.0.0.0 --port $PORT`. + - **Instance Type**: Free (for testing; upgrade later for always-on service). 6. Click **Create Web Service**. Render will build and deploy—watch the progress in the Events tab. ## Step 5: Upload .env as a Secret File @@ -152,6 +154,7 @@ curl -X POST https://your-service.onrender.com/v1/chat/completions -H "Content-T ``` - Expected: A JSON response with the answer (e.g., "Paris"). + 3. Check logs in Render's Dashboard for startup messages (e.g., "RotatingClient initialized"). ## Step 7: Integrate with JanitorAI @@ -176,6 +179,210 @@ That is it. --- +## Appendix: Deploying with Docker + +Docker provides a consistent, portable deployment option for any platform. The proxy image is automatically built and published to GitHub Container Registry (GHCR) on every push to `main` or `dev` branches. + +### Quick Start with Docker Compose + +This is the **fastest way** to deploy the proxy using Docker. + +1. **Create your configuration files:** + +```bash +# Clone the repo (or just download docker-compose.yml and .env.example) +git clone https://github.com/Mirrowel/LLM-API-Key-Proxy.git +cd LLM-API-Key-Proxy + +# Create your .env file +cp .env.example .env +nano .env # Add your PROXY_API_KEY and provider keys + +# Create key_usage.json file (required before first run) +touch key_usage.json +``` + +> **Important:** You must create `key_usage.json` before running Docker Compose. If this file doesn't exist on the host, Docker will create it as a directory instead of a file, causing the container to fail. + +2. **Start the proxy:** + +```bash +docker compose up -d +``` + +3. **Verify it's running:** + +```bash +# Check container status +docker compose ps + +# View logs +docker compose logs -f + +# Test the endpoint +curl http://localhost:8000/ +``` + +### Manual Docker Run + +If you prefer not to use Docker Compose: + +```bash +# Create necessary directories and files +mkdir -p oauth_creds logs +touch key_usage.json + +# Run the container +docker run -d \ + --name llm-api-proxy \ + --restart unless-stopped \ + -p 8000:8000 \ + -v $(pwd)/.env:/app/.env:ro \ + -v $(pwd)/oauth_creds:/app/oauth_creds \ + -v $(pwd)/logs:/app/logs \ + -v $(pwd)/key_usage.json:/app/key_usage.json \ + -e SKIP_OAUTH_INIT_CHECK=true \ + -e PYTHONUNBUFFERED=1 \ + ghcr.io/mirrowel/llm-api-key-proxy:latest +``` + +### Available Image Tags + +| Tag | Description | Use Case | +| ----------------------- | ----------------------------------------------- | -------------------- | +| `latest` | Latest stable build from `main` branch | Production | +| `dev-latest` | Latest build from `dev` branch | Testing new features | +| `YYYYMMDD-HHMMSS-` | Specific version with timestamp and commit hash | Pinned deployments | + +Example using a specific version: + +```bash +docker pull ghcr.io/mirrowel/llm-api-key-proxy:20250106-143022-abc1234 +``` + +### Volume Mounts Explained + +| Host Path | Container Path | Purpose | Mode | +| ------------------ | --------------------- | --------------------------------- | ----------------- | +| `./.env` | `/app/.env` | Configuration and API keys | Read-only (`:ro`) | +| `./oauth_creds/` | `/app/oauth_creds/` | OAuth credential JSON files | Read-write | +| `./logs/` | `/app/logs/` | Request logs and detailed logging | Read-write | +| `./key_usage.json` | `/app/key_usage.json` | Usage statistics persistence | Read-write | + +### Setting Up OAuth Providers with Docker + +OAuth providers (Antigravity, Gemini CLI, Qwen Code, iFlow) require interactive browser authentication. Since Docker containers run headless, you must authenticate **outside the container** first. + +#### Option 1: Authenticate Locally, Mount Credentials (Recommended) + +1. **Set up the project locally:** + +```bash +git clone https://github.com/Mirrowel/LLM-API-Key-Proxy.git +cd LLM-API-Key-Proxy +pip install -r requirements.txt +``` + +2. **Run the credential tool and complete OAuth flows:** + +```bash +python -m rotator_library.credential_tool +# Select "Add OAuth Credential" → Choose provider +# Complete authentication in browser +``` + +3. **Deploy with Docker, mounting the oauth_creds directory:** + +```bash +docker compose up -d +# The oauth_creds/ directory is automatically mounted +``` + +#### Option 2: Export Credentials to Environment Variables + +For truly stateless deployments (no mounted credential files): + +1. **Complete OAuth locally as above** + +2. **Export credentials to environment variables:** + +```bash +python -m rotator_library.credential_tool +# Select "Export [Provider] to .env" +``` + +3. **Add the exported variables to your `.env` file:** + +```env +# Example for Antigravity +ANTIGRAVITY_ACCESS_TOKEN="ya29.a0AfB_byD..." +ANTIGRAVITY_REFRESH_TOKEN="1//0gL6dK9..." +ANTIGRAVITY_EXPIRY_DATE="1735901234567" +ANTIGRAVITY_EMAIL="user@gmail.com" +ANTIGRAVITY_CLIENT_ID="1071006060591-..." +ANTIGRAVITY_CLIENT_SECRET="GOCSPX-..." +``` + +4. **Deploy with Docker:** + +```bash +docker compose up -d +# Credentials are loaded from .env, no oauth_creds mount needed +``` + +### Development: Building Locally + +For development or customization, use the development compose file: + +```bash +# Build and run from local source +docker compose -f docker-compose.dev.yml up -d --build + +# Rebuild after code changes +docker compose -f docker-compose.dev.yml up -d --build --force-recreate +``` + +### Container Management + +```bash +# Stop the proxy +docker compose down + +# Restart the proxy +docker compose restart + +# View real-time logs +docker compose logs -f + +# Check container resource usage +docker stats llm-api-proxy + +# Update to latest image +docker compose pull +docker compose up -d +``` + +### Docker on Different Platforms + +The image is built for both `linux/amd64` and `linux/arm64` architectures, so it works on: + +- Linux servers (x86_64, ARM64) +- macOS (Intel and Apple Silicon) +- Windows with WSL2/Docker Desktop +- Raspberry Pi 4+ (ARM64) + +### Troubleshooting Docker Deployment + +| Issue | Solution | +| ----------------------------- | ---------------------------------------------------------------------------------------------------------------- | +| Container exits immediately | Check logs: `docker compose logs` — likely missing `.env` or invalid config | +| Permission denied on volumes | Ensure directories exist and have correct permissions: `mkdir -p oauth_creds logs && chmod 755 oauth_creds logs` | +| OAuth credentials not loading | Verify `oauth_creds/` is mounted and contains valid JSON files, or check environment variables are set | +| Port already in use | Change the port mapping: `-p 9000:8000` or edit `docker-compose.yml` | +| Image not updating | Force pull: `docker compose pull && docker compose up -d` | + +--- + ## Appendix: Deploying to a Custom VPS If you're deploying the proxy to a **custom VPS** (DigitalOcean, AWS EC2, Linode, etc.) instead of Render.com, you'll encounter special considerations when setting up OAuth providers (Antigravity, Gemini CLI, iFlow). This section covers the professional deployment workflow. @@ -190,12 +397,12 @@ OAuth providers like Antigravity, Gemini CLI, and iFlow require an interactive a The callback servers bind to `localhost` on these ports: -| Provider | Port | Notes | -|---------------|-------|--------------------------------------------| -| **Antigravity** | 51121 | Google OAuth with extended scopes | -| **Gemini CLI** | 8085 | Google OAuth for Gemini API | -| **iFlow** | 11451 | Authorization Code flow with API key fetch | -| **Qwen Code** | N/A | Uses Device Code flow - works on remote VPS ✅ | +| Provider | Port | Notes | +| --------------- | ----- | ---------------------------------------------- | +| **Antigravity** | 51121 | Google OAuth with extended scopes | +| **Gemini CLI** | 8085 | Google OAuth for Gemini API | +| **iFlow** | 11451 | Authorization Code flow with API key fetch | +| **Qwen Code** | N/A | Uses Device Code flow - works on remote VPS ✅ | **The Issue**: When running on a remote VPS, your local browser cannot reach `http://localhost:51121` (or other callback ports) on the remote server, causing authentication to fail with a "connection refused" error. @@ -228,12 +435,14 @@ python -m rotator_library.credential_tool ``` Select **"Add OAuth Credential"** and choose your provider: + - Antigravity -- Gemini CLI +- Gemini CLI - iFlow - Qwen Code (works directly on VPS, but can authenticate locally too) The tool will: + 1. Open your browser automatically 2. Start a local callback server 3. Complete the OAuth flow @@ -242,6 +451,7 @@ The tool will: #### Step 3: Export Credentials to Environment Variables Still in the credential tool, select the export option for each provider: + - **"Export Antigravity to .env"** - **"Export Gemini CLI to .env"** - **"Export iFlow to .env"** @@ -350,6 +560,7 @@ python -m rotator_library.credential_tool **Step 4: Export to Environment Variables** Still in the credential tool: + 1. Select the export option for each provider 2. Copy the generated environment variables 3. Add them to `/path/to/LLM-API-Key-Proxy/.env` on your VPS @@ -382,6 +593,7 @@ ls -la /path/to/LLM-API-Key-Proxy/oauth_creds/ ``` Expected files: + - `antigravity_oauth_1.json` - `gemini_cli_oauth_1.json` - `iflow_oauth_1.json` @@ -403,14 +615,14 @@ On your VPS, edit `.env`: ### Environment Variables vs. Credential Files -| Aspect | Environment Variables | Credential Files | -|---------------------------|------------------------------------------|--------------------------------------------| -| **Security** | ✅ More secure (no files on disk) | ⚠️ Files readable if server compromised | -| **Container-Friendly** | ✅ Perfect for Docker/K8s | ❌ Requires volume mounts | -| **Ease of Rotation** | ✅ Update .env and restart | ⚠️ Need to regenerate JSON files | -| **Backup/Version Control**| ✅ Easy to manage with secrets managers | ❌ Binary files, harder to manage | -| **Auto-Refresh** | ✅ Uses refresh tokens | ✅ Uses refresh tokens | -| **Recommended For** | Production deployments | Local development / testing | +| Aspect | Environment Variables | Credential Files | +| -------------------------- | --------------------------------------- | --------------------------------------- | +| **Security** | ✅ More secure (no files on disk) | ⚠️ Files readable if server compromised | +| **Container-Friendly** | ✅ Perfect for Docker/K8s | ❌ Requires volume mounts | +| **Ease of Rotation** | ✅ Update .env and restart | ⚠️ Need to regenerate JSON files | +| **Backup/Version Control** | ✅ Easy to manage with secrets managers | ❌ Binary files, harder to manage | +| **Auto-Refresh** | ✅ Uses refresh tokens | ✅ Uses refresh tokens | +| **Recommended For** | Production deployments | Local development / testing | **Best Practice**: Always export to environment variables for VPS/cloud deployments. @@ -539,4 +751,3 @@ chown your-username:your-username .env 7. **Monitor logs** for authentication errors and token refresh issues This approach ensures secure, production-ready deployment while maintaining the convenience of OAuth authentication. - diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..aafcb117 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,49 @@ +# Build stage +FROM python:3.11-slim AS builder + +WORKDIR /app + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Set PATH for user-installed packages in builder stage +ENV PATH=/root/.local/bin:$PATH + +# Copy requirements first for better caching +COPY requirements.txt . + +# Copy the local rotator_library for editable install +COPY src/rotator_library ./src/rotator_library + +# Install dependencies +RUN pip install --no-cache-dir --user -r requirements.txt + +# Production stage +FROM python:3.11-slim + +WORKDIR /app + +# Copy installed packages from builder +COPY --from=builder /root/.local /root/.local + +# Make sure scripts in .local are usable +ENV PATH=/root/.local/bin:$PATH + +# Copy application code +COPY src/ ./src/ + +# Create directories for logs and oauth credentials +RUN mkdir -p logs oauth_creds + +# Expose the default port +EXPOSE 8000 + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONPATH=/app/src + +# Default command - runs proxy with the correct PYTHONPATH +CMD ["python", "src/proxy_app/main.py", "--port", "8000"] diff --git a/README.md b/README.md index c6206392..cd650e5e 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ A self-hosted proxy that provides a single, OpenAI-compatible API endpoint for all your LLM providers. Works with any application that supports custom OpenAI base URLs—no code changes required in your existing tools. This project consists of two components: + 1. **The API Proxy** — A FastAPI application providing a universal `/v1/chat/completions` endpoint 2. **The Resilience Library** — A reusable Python library for intelligent API key management, rotation, and failover @@ -39,6 +40,35 @@ chmod +x proxy_app ./proxy_app ``` +### Docker + +**Using the pre-built image (recommended):** + +```bash +# Pull and run directly +docker run -d \ + --name llm-api-proxy \ + -p 8000:8000 \ + -v $(pwd)/.env:/app/.env:ro \ + -v $(pwd)/oauth_creds:/app/oauth_creds \ + -v $(pwd)/logs:/app/logs \ + -e SKIP_OAUTH_INIT_CHECK=true \ + ghcr.io/mirrowel/llm-api-key-proxy:latest +``` + +**Using Docker Compose:** + +```bash +# Create your .env file and key_usage.json first, then: +cp .env.example .env +touch key_usage.json +docker compose up -d +``` + +> **Important:** You must create both `.env` and `key_usage.json` files before running Docker Compose. If `key_usage.json` doesn't exist, Docker will create it as a directory instead of a file, causing errors. + +> **Note:** For OAuth providers, complete authentication locally first using the credential tool, then mount the `oauth_creds/` directory or export credentials to environment variables. + ### From Source ```bash @@ -133,13 +163,15 @@ In your configuration file (e.g., `config.json`): ```json { - "models": [{ - "title": "Gemini via Proxy", - "provider": "openai", - "model": "gemini/gemini-2.5-flash", - "apiBase": "http://127.0.0.1:8000/v1", - "apiKey": "your-proxy-api-key" - }] + "models": [ + { + "title": "Gemini via Proxy", + "provider": "openai", + "model": "gemini/gemini-2.5-flash", + "apiBase": "http://127.0.0.1:8000/v1", + "apiKey": "your-proxy-api-key" + } + ] } ``` @@ -322,12 +354,14 @@ The proxy includes a powerful text-based UI for configuration and management. 🔌 Provider-Specific Features **Gemini CLI:** + - Zero-config Google Cloud project discovery - Internal API access with higher rate limits - Automatic fallback to preview models on rate limit - Paid vs free tier detection **Antigravity:** + - Gemini 3 Pro with `thinkingLevel` support - Gemini 2.5 Flash/Flash Lite with thinking mode - Claude Opus 4.5 (thinking mode) @@ -338,22 +372,25 @@ The proxy includes a powerful text-based UI for configuration and management. - Quota baseline tracking with background refresh - Parallel tool usage instruction injection - **Quota Groups**: Models that share quota are automatically grouped: - - Claude/GPT-OSS: `claude-sonnet-4-5`, `claude-opus-4-5`, `gpt-oss-120b-medium` - - Gemini 3 Pro: `gemini-3-pro-high`, `gemini-3-pro-low`, `gemini-3-pro-preview` - - Gemini 2.5 Flash: `gemini-2.5-flash`, `gemini-2.5-flash-thinking`, `gemini-2.5-flash-lite` - - All models in a group deplete the usage of the group equally. So in claude group - it is beneficial to use only Opus, and forget about Sonnet and GPT-OSS. + - Claude/GPT-OSS: `claude-sonnet-4-5`, `claude-opus-4-5`, `gpt-oss-120b-medium` + - Gemini 3 Pro: `gemini-3-pro-high`, `gemini-3-pro-low`, `gemini-3-pro-preview` + - Gemini 2.5 Flash: `gemini-2.5-flash`, `gemini-2.5-flash-thinking`, `gemini-2.5-flash-lite` + - All models in a group deplete the usage of the group equally. So in claude group - it is beneficial to use only Opus, and forget about Sonnet and GPT-OSS. **Qwen Code:** + - Dual auth (API key + OAuth Device Flow) - `` tag parsing as `reasoning_content` - Tool schema cleaning **iFlow:** + - Dual auth (API key + OAuth Authorization Code) - Hybrid auth with separate API key fetch - Tool schema cleaning **NVIDIA NIM:** + - Dynamic model discovery - DeepSeek thinking support @@ -414,12 +451,14 @@ The proxy includes a powerful text-based UI for configuration and management. Control which models are exposed through your proxy. ### Blacklist Only + ```env # Hide all preview models IGNORE_MODELS_OPENAI="*-preview*" ``` ### Pure Whitelist Mode + ```env # Block all, then allow specific models IGNORE_MODELS_GEMINI="*" @@ -427,6 +466,7 @@ WHITELIST_MODELS_GEMINI="gemini-2.5-pro,gemini-2.5-flash" ``` ### Exemption Mode + ```env # Block preview models, but allow one specific preview IGNORE_MODELS_OPENAI="*-preview*" @@ -497,6 +537,7 @@ TIMEOUT_READ_NON_STREAMING=600 # Full response wait (10 min) ``` **Recommendations:** + - Long thinking tasks: Increase `TIMEOUT_READ_STREAMING` to 300-360s - Unstable network: Increase `TIMEOUT_CONNECT` to 60s - Large outputs: Increase `TIMEOUT_READ_NON_STREAMING` to 900s+ @@ -513,12 +554,14 @@ TIMEOUT_READ_NON_STREAMING=600 # Full response wait (10 min) Uses Google OAuth to access internal Gemini endpoints with higher rate limits. **Setup:** + 1. Run `python -m rotator_library.credential_tool` 2. Select "Add OAuth Credential" → "Gemini CLI" 3. Complete browser authentication 4. Credentials saved to `oauth_creds/gemini_cli_oauth_1.json` **Features:** + - Zero-config project discovery - Automatic free-tier project onboarding - Paid vs free tier detection @@ -575,6 +618,7 @@ GEMINI_CLI_QUOTA_REFRESH_INTERVAL=300 # Quota refresh interval in seconds (defa Access Google's internal Antigravity API for cutting-edge models. **Supported Models:** + - **Gemini 3 Pro** — with `thinkingLevel` support (low/high) - **Gemini 2.5 Flash** — with thinking mode support - **Gemini 2.5 Flash Lite** — configurable thinking budget @@ -583,11 +627,13 @@ Access Google's internal Antigravity API for cutting-edge models. - **GPT-OSS 120B** — OpenAI-compatible model **Setup:** + 1. Run `python -m rotator_library.credential_tool` 2. Select "Add OAuth Credential" → "Antigravity" 3. Complete browser authentication **Advanced Features:** + - Thought signature caching for multi-turn conversations - Tool hallucination prevention via parameter signature injection - Automatic thinking block sanitization for Claude @@ -596,6 +642,7 @@ Access Google's internal Antigravity API for cutting-edge models. - Parallel tool usage instruction injection for Claude **Environment Variables:** + ```env ANTIGRAVITY_ACCESS_TOKEN="ya29.your-access-token" ANTIGRAVITY_REFRESH_TOKEN="1//your-refresh-token" @@ -619,12 +666,14 @@ ANTIGRAVITY_PARALLEL_TOOL_INSTRUCTION_CLAUDE=true # Parallel tool instruction f Uses OAuth Device Flow for Qwen/Dashscope APIs. **Setup:** + 1. Run the credential tool 2. Select "Add OAuth Credential" → "Qwen Code" 3. Enter the code displayed in your browser 4. Or add API key directly: `QWEN_CODE_API_KEY_1="your-key"` **Features:** + - Dual auth (API key or OAuth) - `` tag parsing as `reasoning_content` - Automatic tool schema cleaning @@ -638,12 +687,14 @@ Uses OAuth Device Flow for Qwen/Dashscope APIs. Uses OAuth Authorization Code flow with local callback server. **Setup:** + 1. Run the credential tool 2. Select "Add OAuth Credential" → "iFlow" 3. Complete browser authentication (callback on port 11451) 4. Or add API key directly: `IFLOW_API_KEY_1="sk-your-key"` **Features:** + - Dual auth (API key or OAuth) - Hybrid auth (OAuth token fetches separate API key) - Automatic tool schema cleaning @@ -657,12 +708,14 @@ Uses OAuth Authorization Code flow with local callback server. For platforms without file persistence (Railway, Render, Vercel): 1. **Set up credentials locally:** + ```bash python -m rotator_library.credential_tool # Complete OAuth flows ``` 2. **Export to environment variables:** + ```bash python -m rotator_library.credential_tool # Select "Export [Provider] to .env" @@ -680,11 +733,11 @@ For platforms without file persistence (Railway, Render, Vercel): Customize OAuth callback ports if defaults conflict: -| Provider | Default Port | Environment Variable | -|----------|-------------|---------------------| -| Gemini CLI | 8085 | `GEMINI_CLI_OAUTH_PORT` | -| Antigravity | 51121 | `ANTIGRAVITY_OAUTH_PORT` | -| iFlow | 11451 | `IFLOW_OAUTH_PORT` | +| Provider | Default Port | Environment Variable | +| ----------- | ------------ | ------------------------ | +| Gemini CLI | 8085 | `GEMINI_CLI_OAUTH_PORT` | +| Antigravity | 51121 | `ANTIGRAVITY_OAUTH_PORT` | +| iFlow | 11451 | `IFLOW_OAUTH_PORT` | @@ -706,6 +759,7 @@ Options: ``` **Examples:** + ```bash # Run on custom port python src/proxy_app/main.py --host 127.0.0.1 --port 9000 @@ -725,6 +779,7 @@ python src/proxy_app/main.py --add-credential See the [Deployment Guide](Deployment%20guide.md) for complete instructions. **Quick Setup:** + 1. Fork the repository 2. Create a `.env` file with your credentials 3. Create a new Web Service pointing to your repo @@ -738,14 +793,95 @@ Export OAuth credentials to environment variables using the credential tool, the
-Custom VPS / Docker +Docker + +The proxy is available as a multi-architecture Docker image (amd64/arm64) from GitHub Container Registry. + +**Quick Start with Docker Compose:** + +```bash +# 1. Create your .env file with PROXY_API_KEY and provider keys +cp .env.example .env +nano .env + +# 2. Create key_usage.json file (required before first run) +touch key_usage.json + +# 3. Start the proxy +docker compose up -d + +# 4. Check logs +docker compose logs -f +``` + +> **Important:** You must create `key_usage.json` before running Docker Compose. If this file doesn't exist on the host, Docker will create it as a directory instead of a file, causing the container to fail. + +**Manual Docker Run:** + +```bash +# Create key_usage.json if it doesn't exist +touch key_usage.json + +docker run -d \ + --name llm-api-proxy \ + --restart unless-stopped \ + -p 8000:8000 \ + -v $(pwd)/.env:/app/.env:ro \ + -v $(pwd)/oauth_creds:/app/oauth_creds \ + -v $(pwd)/logs:/app/logs \ + -v $(pwd)/key_usage.json:/app/key_usage.json \ + -e SKIP_OAUTH_INIT_CHECK=true \ + -e PYTHONUNBUFFERED=1 \ + ghcr.io/mirrowel/llm-api-key-proxy:latest +``` + +**Development with Local Build:** + +```bash +# Build and run locally +docker compose -f docker-compose.dev.yml up -d --build +``` + +**Volume Mounts:** + +| Path | Purpose | +| ---------------- | -------------------------------------- | +| `.env` | Configuration and API keys (read-only) | +| `oauth_creds/` | OAuth credential files (persistent) | +| `logs/` | Request logs and detailed logging | +| `key_usage.json` | Usage statistics persistence | + +**Image Tags:** + +| Tag | Description | +| ----------------------- | ------------------------------------------ | +| `latest` | Latest stable from `main` branch | +| `dev-latest` | Latest from `dev` branch | +| `YYYYMMDD-HHMMSS-` | Specific version with timestamp and commit | + +**OAuth with Docker:** + +For OAuth providers (Antigravity, Gemini CLI, etc.), you must authenticate locally first: + +1. Run `python -m rotator_library.credential_tool` on your local machine +2. Complete OAuth flows in browser +3. Either: + - Mount `oauth_creds/` directory to container, or + - Export credentials to `.env` using the export option + +
+ +
+Custom VPS / Systemd **Option 1: Authenticate locally, deploy credentials** + 1. Complete OAuth flows on your local machine 2. Export to environment variables 3. Deploy `.env` to your server **Option 2: SSH Port Forwarding** + ```bash # Forward callback ports through SSH ssh -L 51121:localhost:51121 -L 8085:localhost:8085 user@your-vps @@ -754,6 +890,7 @@ ssh -L 51121:localhost:51121 -L 8085:localhost:8085 user@your-vps ``` **Systemd Service:** + ```ini [Unit] Description=LLM API Key Proxy @@ -789,6 +926,7 @@ See [VPS Deployment](Deployment%20guide.md#appendix-deploying-to-a-custom-vps) f **Detailed Logs:** When `--enable-request-logging` is enabled, check `logs/detailed_logs/` for: + - `request.json` — Exact request payload - `final_response.json` — Complete response or error - `streaming_chunks.jsonl` — All SSE chunks received @@ -810,5 +948,6 @@ When `--enable-request-logging` is enabled, check `logs/detailed_logs/` for: ## License This project is dual-licensed: + - **Proxy Application** (`src/proxy_app/`) — [MIT License](src/proxy_app/LICENSE) - **Resilience Library** (`src/rotator_library/`) — [LGPL-3.0](src/rotator_library/COPYING.LESSER) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 00000000..36458929 --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,30 @@ +services: + llm-proxy: + build: + context: . + dockerfile: Dockerfile + container_name: llm-api-proxy-dev + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + ports: + - "${PORT:-8000}:8000" + volumes: + # Mount .env files for configuration + - ./.env:/app/.env:ro + # Mount oauth_creds directory for OAuth credentials persistence + - ./oauth_creds:/app/oauth_creds + # Mount logs directory for persistent logging + - ./logs:/app/logs + # Mount key_usage.json for usage statistics persistence + - ./key_usage.json:/app/key_usage.json + # Optionally mount additional .env files (e.g., combined credential files) + # - ./antigravity_all_combined.env:/app/antigravity_all_combined.env:ro + environment: + # Skip OAuth interactive initialization in container (non-interactive) + - SKIP_OAUTH_INIT_CHECK=true + # Ensure Python output is not buffered + - PYTHONUNBUFFERED=1 diff --git a/docker-compose.tls.yml b/docker-compose.tls.yml new file mode 100644 index 00000000..e210423f --- /dev/null +++ b/docker-compose.tls.yml @@ -0,0 +1,47 @@ +services: + nginx-proxy-manager: + image: "jc21/nginx-proxy-manager:latest" + container_name: nginx-proxy-manager + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + ports: + - "80:80" # Public HTTP + - "443:443" # Public HTTPS + - "81:81" # Admin Web Interface + volumes: + - ./data:/data + - ./letsencrypt:/etc/letsencrypt + # This allows the proxy to talk to other containers using "host.docker.internal" + extra_hosts: + - "host.docker.internal:host-gateway" + llm-proxy: + image: ghcr.io/mirrowel/llm-api-key-proxy:latest + container_name: llm-api-proxy-tls + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + ports: + - "${PORT:-8000}:8000" + volumes: + # Mount .env files for configuration + - ./.env:/app/.env:ro + # Mount oauth_creds directory for OAuth credentials persistence + - ./oauth_creds:/app/oauth_creds + # Mount logs directory for persistent logging + - ./logs:/app/logs + # Mount key_usage.json for usage statistics persistence + - ./key_usage.json:/app/key_usage.json + # Optionally mount additional .env files (e.g., combined credential files) + # - ./antigravity_all_combined.env:/app/antigravity_all_combined.env:ro + environment: + # Skip OAuth interactive initialization in container (non-interactive) + - SKIP_OAUTH_INIT_CHECK=true + # Ensure Python output is not buffered + - PYTHONUNBUFFERED=1 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..31964b60 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,28 @@ +services: + llm-proxy: + image: ghcr.io/mirrowel/llm-api-key-proxy:latest + container_name: llm-api-proxy + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + ports: + - "${PORT:-8000}:8000" + volumes: + # Mount .env files for configuration + - ./.env:/app/.env:ro + # Mount oauth_creds directory for OAuth credentials persistence + - ./oauth_creds:/app/oauth_creds + # Mount logs directory for persistent logging + - ./logs:/app/logs + # Mount key_usage.json for usage statistics persistence + - ./key_usage.json:/app/key_usage.json + # Optionally mount additional .env files (e.g., combined credential files) + # - ./antigravity_all_combined.env:/app/antigravity_all_combined.env:ro + environment: + # Skip OAuth interactive initialization in container (non-interactive) + - SKIP_OAUTH_INIT_CHECK=true + # Ensure Python output is not buffered + - PYTHONUNBUFFERED=1