Merge pull request #369 from DefangLabs/linda-managed-llm

jordanstephens · web-flow · commit 2aebbf8d613b · 2025-05-16T12:23:45.000-07:00
Add Managed LLM sample
diff --git a/.github/workflows/deploy-changed-samples.yml b/.github/workflows/deploy-changed-samples.yml
@@ -82,6 +82,7 @@ jobs:
           TEST_MB_DB_PASS: ${{ secrets.TEST_MB_DB_PASS }}
           TEST_MB_DB_PORT: ${{ secrets.TEST_MB_DB_PORT }}
           TEST_MB_DB_USER: ${{ secrets.TEST_MB_DB_USER }}
+          TEST_MODEL: ${{ secrets.TEST_MODEL }}
           TEST_MONGO_INITDB_ROOT_USERNAME: ${{ secrets.TEST_MONGO_INITDB_ROOT_USERNAME }}
           TEST_MONGO_INITDB_ROOT_PASSWORD: ${{ secrets.TEST_MONGO_INITDB_ROOT_PASSWORD }}
           TEST_NC_DB: ${{ secrets.TEST_NC_DB }}
diff --git a/samples/managed-llm/.devcontainer/Dockerfile b/samples/managed-llm/.devcontainer/Dockerfile
@@ -0,0 +1,2 @@
+
+FROM mcr.microsoft.com/devcontainers/python:alpine3.13
diff --git a/samples/managed-llm/.devcontainer/devcontainer.json b/samples/managed-llm/.devcontainer/devcontainer.json
@@ -0,0 +1,11 @@
+{
+  "build": {
+    "dockerfile": "Dockerfile",
+    "context": ".."
+  },
+  "features": {
+    "ghcr.io/defanglabs/devcontainer-feature/defang-cli:1.0.4": {},
+    "ghcr.io/devcontainers/features/docker-in-docker:2": {},
+    "ghcr.io/devcontainers/features/aws-cli:1": {}
+  }
+}
diff --git a/samples/managed-llm/.dockerignore b/samples/managed-llm/.dockerignore
@@ -0,0 +1,14 @@
+# Default .dockerignore file for Defang
+**/__pycache__
+**/.git
+**/.github
+**/compose.*.yaml
+**/compose.*.yml
+**/compose.yaml
+**/compose.yml
+**/docker-compose.*.yaml
+**/docker-compose.*.yml
+**/docker-compose.yaml
+**/docker-compose.yml
+Dockerfile
+*.Dockerfile
diff --git a/samples/managed-llm/.github/workflows/deploy.yaml b/samples/managed-llm/.github/workflows/deploy.yaml
@@ -0,0 +1,25 @@
+name: Deploy
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    environment: playground
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v4
+
+    - name: Deploy
+      uses: DefangLabs/defang-github-action@v1.2.0
+      with:
+        config-env-vars: MODEL
+      env:
+        MODEL: ${{ secrets.MODEL }}
diff --git a/samples/managed-llm/.gitignore b/samples/managed-llm/.gitignore
@@ -0,0 +1,3 @@
+.env
+myenv
+__pycache__/
diff --git a/samples/managed-llm/README.md b/samples/managed-llm/README.md
@@ -0,0 +1,74 @@
+# Managed LLM
+
+[![1-click-deploy](https://raw.githubusercontent.com/DefangLabs/defang-assets/main/Logos/Buttons/SVG/deploy-with-defang.svg)](https://portal.defang.dev/redirect?url=https%3A%2F%2Fgithub.com%2Fnew%3Ftemplate_name%3Dsample-managed-llm-template%26template_owner%3DDefangSamples)
+
+This sample application demonstrates the use of OpenAI-compatible Managed LLMs (Large Language Models) with Defang.
+
+
+> Note: Using Docker Model Provider? See our [*Managed LLM with Docker Model Provider*](https://github.com/DefangLabs/samples/tree/main/samples/managed-llm-provider) sample.
+
+The OpenAI-compatible managed LLM feature, provided by the Defang OpenAI Access Gateway, allows users to use AWS Bedrock or Google Cloud Vertex AI with an OpenAI compatible SDK. This enables switching from OpenAI to one of these cloud-native platforms without modifying your application code.
+
+You can configure the `MODEL` and `ENDPOINT_URL` for the LLM separately for local development and production environments.
+* The `MODEL` is the LLM Model ID you are using.
+* The `ENDPOINT_URL` is the bridge that provides authenticated access to the LLM model. 
+
+Ensure you have enabled model access for the model you intend to use. To do this, you can check your [AWS Bedrock model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) or [GCP Vertex AI model access](https://cloud.google.com/vertex-ai/generative-ai/docs/control-model-access).
+
+### Defang OpenAI Access Gateway
+
+In the `compose.yaml` file, the `llm` service is used to route requests to the LLM API model. This is known as the Defang OpenAI Access Gateway. 
+
+The `x-defang-llm` property on the `llm` service must be set to `true` in order to use the OpenAI Access Gateway when deploying with Defang.
+
+## Prerequisites
+
+1. Download [Defang CLI](https://github.com/DefangLabs/defang)
+2. (Optional) If you are using [Defang BYOC](https://docs.defang.io/docs/concepts/defang-byoc) authenticate with your cloud provider account
+3. (Optional for local development) [Docker CLI](https://docs.docker.com/engine/install/)
+
+## Development
+
+To run the application locally, you can use the following command:
+
+```bash
+docker compose -f compose.dev.yaml up --build
+```
+
+## Configuration
+
+For this sample, you will need to provide the following [configuration](https://docs.defang.io/docs/concepts/configuration): 
+
+> Note that if you are using the 1-click deploy option, you can set these values as secrets in your GitHub repository and the action will automatically deploy them for you.
+
+### `MODEL`
+The Model ID of the LLM you are using for your application. For example, `anthropic.claude-3-5-sonnet-20241022-v2:0`.
+```bash
+defang config set MODEL
+```
+
+## Deployment
+
+> [!NOTE]
+> Download [Defang CLI](https://github.com/DefangLabs/defang)
+
+### Defang Playground
+
+Deploy your application to the Defang Playground by opening up your terminal and typing:
+```bash
+defang compose up
+```
+
+### BYOC
+
+If you want to deploy to your own cloud account, you can [use Defang BYOC](https://docs.defang.io/docs/tutorials/deploy-to-your-cloud).
+
+---
+
+Title: Managed LLM
+
+Short Description: An app using Managed LLMs with Defang's OpenAI Access Gateway.
+
+Tags: LLM, OpenAI, Python, Bedrock, Vertex
+
+Languages: Python
diff --git a/samples/managed-llm/app/Dockerfile b/samples/managed-llm/app/Dockerfile
@@ -0,0 +1,22 @@
+FROM public.ecr.aws/docker/library/python:3.12-slim
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirement files first (for better Docker cache)
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the code
+COPY . .
+
+# Expose the port that Uvicorn will run on
+EXPOSE 8000
+
+# Set environment variable for the port
+ENV PORT=8000
+
+# Run the app with the correct module path using shell form to interpolate environment variable
+CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port $PORT"]
diff --git a/samples/managed-llm/app/app.py b/samples/managed-llm/app/app.py
@@ -0,0 +1,116 @@
+import os
+import json
+import logging
+from fastapi import FastAPI, Form, Request
+from fastapi.responses import HTMLResponse
+import requests
+
+app = FastAPI()
+
+# Configure basic logging
+logging.basicConfig(level=logging.INFO)
+
+# Set the environment variables for the chat model
+ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions")
+# Fallback to OpenAI Model if not set in environment
+MODEL_ID = os.getenv("MODEL", "gpt-4-turbo")
+
+# Get the API key for the LLM
+# For development, you can use your local API key. In production, the LLM gateway service will override the need for it.
+def get_api_key():
+    return os.getenv("OPENAI_API_KEY", "API key not set")
+
+# Home page form
+@app.get("/", response_class=HTMLResponse)
+async def home():
+    return """
+    <html>
+        <head><title>Ask the AI Model</title></head>
+        <body>
+            <h1>Ask the AI Model</h1>
+            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
+                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..." 
+                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.submit();}">
+                </textarea>
+                <br><br>
+                <input type="submit" value="Ask">
+            </form>
+        </body>
+        
+    </html>
+    """
+
+# Handle form submission
+@app.post("/ask", response_class=HTMLResponse)
+async def ask(prompt: str = Form(...)):
+    headers = {
+        "Content-Type": "application/json"
+    }
+
+    api_key = get_api_key()
+    headers["Authorization"] = f"Bearer {api_key}"
+
+    payload = {
+        "model": MODEL_ID,
+        "messages": [
+            {"role": "user", "content": prompt}
+        ],
+        "stream": False
+    }
+
+    # Log request details
+    logging.info(f"Sending POST to {ENDPOINT_URL}")
+    logging.info(f"Request Headers: {headers}")
+    logging.info(f"Request Payload: {payload}")
+
+    response = None
+    reply = None
+    try:
+        response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload))
+    except requests.exceptions.HTTPError as errh:
+        reply = f"HTTP error:", errh
+    except requests.exceptions.ConnectionError as errc:
+        reply = f"Connection error:", errc
+    except requests.exceptions.Timeout as errt:
+        reply = f"Timeout error:", errt
+    except requests.exceptions.RequestException as err:
+        reply = f"Unexpected error:", err
+
+    if response is not None:
+        # logging.info(f"Response Status Code: {response.status_code}")
+        # logging.info(f"Response Headers: {response.headers}")
+        # logging.info(f"Response Body: {response.text}")
+        if response.status_code == 200:
+            data = response.json()
+            try:
+                reply = data["choices"][0]["message"]["content"]
+            except (KeyError, IndexError):
+                reply = "Model returned an unexpected response."
+        elif response.status_code == 400:
+            reply = f"Connect Error: {response.status_code} - {response.text}"
+        elif response.status_code == 500:
+            reply = f"Error from server: {response.status_code} - {response.text}"
+        else:
+            # Log error details
+            reply = f"Error from server: {response.status_code} - {response.text}"
+            logging.error(f"Error from server: {response.status_code} - {response.text}")
+
+    # Return result
+    return f"""
+    <html>
+        <head><title>Ask the AI Model</title></head>
+        <body>
+            <h1>Ask the AI Model</h1>
+            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
+                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
+                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){{event.preventDefault();this.form.submit();}}"></textarea><br><br>
+                <input type="submit" value="Ask">
+            </form>
+            <h2>You Asked:</h2>
+            <p>{prompt}</p>
+            <hr>
+            <h2>Model's Reply:</h2>
+            <p>{reply}</p>
+        </body>
+    </html>
+    """
diff --git a/samples/managed-llm/app/requirements.txt b/samples/managed-llm/app/requirements.txt
@@ -0,0 +1,5 @@
+dotenv
+fastapi
+python-multipart
+requests
+uvicorn
diff --git a/samples/managed-llm/compose.local.yaml b/samples/managed-llm/compose.local.yaml
@@ -0,0 +1,17 @@
+services:
+  app:
+    extends:
+      file: compose.yaml
+      service: app
+  llm:
+    extends:
+        file: compose.yaml
+        service: llm
+    # if using AWS Bedrock for local development, include this section:
+    environment:
+      - AWS_REGION=${AWS_REGION} # replace with your AWS region
+      - AWS_PROFILE=${AWS_PROFILE} # replace with your AWS profile name
+    volumes:
+      - type: bind
+        source: ~/.aws
+        target: /root/.aws
diff --git a/samples/managed-llm/compose.yaml b/samples/managed-llm/compose.yaml
@@ -0,0 +1,36 @@
+services:
+  app:
+    build: 
+      context: ./app
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    restart: always
+    environment:
+      - ENDPOINT_URL=http://llm/api/v1/chat/completions   # endpoint to the gateway service
+      - MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0   # LLM model ID used for the gateway
+      - OPENAI_API_KEY=FAKE_TOKEN   # the actual value will be ignored when using the gateway, but it should match the one in the llm service
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 5s
+
+# Defang OpenAI Access Gateway
+# This service is used to route requests to the LLM API
+  llm:
+    x-defang-llm: true # this must be set to true for the gateway to work
+    image: docker.io/defangio/openai-access-gateway:latest
+    ports:
+      - target: 80
+        published: 80
+        protocol: tcp
+        mode: host
+    environment:
+      - OPENAI_API_KEY=FAKE_TOKEN # this value must match the one in the app service
+      - USE_MODEL_MAPPING=false
+      - DEBUG=true
+      # if using GCP for BYOC deployment, add these environment variables:
+      # - GCP_PROJECT_ID=${GCP_PROJECT_ID}
+      # - GCP_REGION=${GCP_REGION}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+`
	`2`	`+FROM mcr.microsoft.com/devcontainers/python:alpine3.13`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +dotenv
 +fastapi
 +python-multipart
 +requests
 +uvicorn