add managed llm sample

commit111 · commit111 · commit 8607f87bacae · 2025-05-09T16:53:06.000-07:00
diff --git a/.github/workflows/deploy-changed-samples.yml b/.github/workflows/deploy-changed-samples.yml
@@ -82,6 +82,7 @@ jobs:
           TEST_MB_DB_PASS: ${{ secrets.TEST_MB_DB_PASS }}
           TEST_MB_DB_PORT: ${{ secrets.TEST_MB_DB_PORT }}
           TEST_MB_DB_USER: ${{ secrets.TEST_MB_DB_USER }}
+          TEST_MODEL: ${{ secrets.TEST_MODEL }}
           TEST_MONGO_INITDB_ROOT_USERNAME: ${{ secrets.TEST_MONGO_INITDB_ROOT_USERNAME }}
           TEST_MONGO_INITDB_ROOT_PASSWORD: ${{ secrets.TEST_MONGO_INITDB_ROOT_PASSWORD }}
           TEST_NC_DB: ${{ secrets.TEST_NC_DB }}
diff --git a/samples/managed-llm/.devcontainer/Dockerfile b/samples/managed-llm/.devcontainer/Dockerfile
@@ -0,0 +1,2 @@
+
+FROM mcr.microsoft.com/devcontainers/python:alpine3.13
diff --git a/samples/managed-llm/.devcontainer/devcontainer.json b/samples/managed-llm/.devcontainer/devcontainer.json
@@ -0,0 +1,11 @@
+{
+  "build": {
+    "dockerfile": "Dockerfile",
+    "context": ".."
+  },
+  "features": {
+    "ghcr.io/defanglabs/devcontainer-feature/defang-cli:1.0.4": {},
+    "ghcr.io/devcontainers/features/docker-in-docker:2": {},
+    "ghcr.io/devcontainers/features/aws-cli:1": {}
+  }
+}
diff --git a/samples/managed-llm/.dockerignore b/samples/managed-llm/.dockerignore
@@ -0,0 +1,27 @@
+# Default .dockerignore file for Defang
+**/__pycache__
+**/.direnv
+**/.DS_Store
+**/.envrc
+**/.git
+**/.github
+**/.idea
+**/.next
+**/.vscode
+**/compose.*.yaml
+**/compose.*.yml
+**/compose.yaml
+**/compose.yml
+**/docker-compose.*.yaml
+**/docker-compose.*.yml
+**/docker-compose.yaml
+**/docker-compose.yml
+**/node_modules
+**/Thumbs.db
+Dockerfile
+*.Dockerfile
+# Ignore our own binary, but only in the root to avoid ignoring subfolders
+defang
+defang.exe
+# Ignore our project-level state
+.defang
diff --git a/samples/managed-llm/.github/workflows/deploy.yaml b/samples/managed-llm/.github/workflows/deploy.yaml
@@ -0,0 +1,25 @@
+name: Deploy
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    environment: playground
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v4
+
+    - name: Deploy
+      uses: DefangLabs/defang-github-action@v1.1.3
+      with:
+        config-env-vars: MODEL
+      env:
+        MODEL: ${{ secrets.MODEL }}
diff --git a/samples/managed-llm/.gitignore b/samples/managed-llm/.gitignore
@@ -0,0 +1,3 @@
+.env
+myenv
+__pycache__/
diff --git a/samples/managed-llm/README.md b/samples/managed-llm/README.md
@@ -0,0 +1,67 @@
+# Managed LLM
+
+[![1-click-deploy](https://raw.githubusercontent.com/DefangLabs/defang-assets/main/Logos/Buttons/SVG/deploy-with-defang.svg)](https://portal.defang.dev/redirect?url=https%3A%2F%2Fgithub.com%2Fnew%3Ftemplate_name%3Dsample-managed-llm-template%26template_owner%3DDefangSamples)
+
+This sample application demonstrates the use of Managed LLMs (Large Language Models) with Defang.
+
+The Managed LLM feature, provided by the Defang OpenAI Access Gateway, enables users to switch LLM models in production without altering the app's API interface.
+
+You can configure the `MODEL` and `ENDPOINT_URL` for the LLM separately for local development and production environments. The `MODEL` is the LLM Model ID you are using. The `ENDPOINT_URL` is the bridge that provides authenticated access to the LLM model. 
+
+### Defang OpenAI Access Gateway
+
+In the `compose.yaml` file, the `llm-gateway` service is used to route requests to the LLM API model. This is known as the Defang OpenAI Access Gateway. 
+
+The `x-defang-llm` property must be set to `true` to enable the feature during deployment with Defang.  
+
+## Prerequisites
+
+1. Download [Defang CLI](https://github.com/DefangLabs/defang)
+2. (Optional) If you are using [Defang BYOC](https://docs.defang.io/docs/concepts/defang-byoc) authenticate with your cloud provider account
+3. (Optional for local development) [Docker CLI](https://docs.docker.com/engine/install/)
+
+## Development
+
+To run the application locally, you can use the following command:
+
+```bash
+docker compose -f compose.dev.yaml up --build
+```
+
+## Configuration
+
+For this sample, you will need to provide the following [configuration](https://docs.defang.io/docs/concepts/configuration): 
+
+> Note that if you are using the 1-click deploy option, you can set these values as secrets in your GitHub repository and the action will automatically deploy them for you.
+
+### `MODEL`
+The Model ID of the LLM you are using for your application. For example, `anthropic.claude-3-5-haiku-20241022-v1:0`.
+```bash
+defang config set MODEL
+```
+
+## Deployment
+
+> [!NOTE]
+> Download [Defang CLI](https://github.com/DefangLabs/defang)
+
+### Defang Playground
+
+Deploy your application to the Defang Playground by opening up your terminal and typing:
+```bash
+defang compose up
+```
+
+### BYOC
+
+If you want to deploy to your own cloud account, you can [use Defang BYOC](https://docs.defang.io/docs/tutorials/deploy-to-your-cloud).
+
+---
+
+Title: Managed LLM
+
+Short Description: An app using Managed LLMs with Defang's OpenAI Access Gateway.
+
+Tags: LLM, Managed LLM, FastAPI, OpenAI, Python
+
+Languages: Python
diff --git a/samples/managed-llm/app/.dockerignore b/samples/managed-llm/app/.dockerignore
@@ -0,0 +1,27 @@
+# Default .dockerignore file for Defang
+**/__pycache__
+**/.direnv
+**/.DS_Store
+**/.envrc
+**/.git
+**/.github
+**/.idea
+**/.next
+**/.vscode
+**/compose.*.yaml
+**/compose.*.yml
+**/compose.yaml
+**/compose.yml
+**/docker-compose.*.yaml
+**/docker-compose.*.yml
+**/docker-compose.yaml
+**/docker-compose.yml
+**/node_modules
+**/Thumbs.db
+Dockerfile
+*.Dockerfile
+# Ignore our own binary, but only in the root to avoid ignoring subfolders
+defang
+defang.exe
+# Ignore our project-level state
+.defang
diff --git a/samples/managed-llm/app/Dockerfile b/samples/managed-llm/app/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:alpine
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirement files first (for better Docker cache)
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the code
+COPY . .
+
+# Expose the port that Uvicorn will run on
+EXPOSE 8000
+
+# Run the app with the correct module path using Uvicorn
+CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port 8000"]
diff --git a/samples/managed-llm/app/app.py b/samples/managed-llm/app/app.py
@@ -0,0 +1,100 @@
+import os
+import json
+import logging
+from fastapi import FastAPI, Form, Request
+from fastapi.responses import HTMLResponse
+import requests
+import dotenv
+
+app = FastAPI()
+
+# Load environment variables from .env file
+dotenv.load_dotenv()
+
+# Configure basic logging
+logging.basicConfig(level=logging.INFO)
+
+# Set the endpoint URL for the chat model
+# Here, we use the OpenAI API as an example:
+ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1")
+
+# Get the API key for the LLM
+# For development, you can use your local API key. In production, you will need to configure your API key in the LLM gateway service.
+def get_api_key():
+    return os.getenv("OPENAI_API_KEY", "API key not set")
+
+# Home page form
+@app.get("/", response_class=HTMLResponse)
+async def home():
+    return """
+    <html>
+        <head><title>Ask the Model</title></head>
+        <body>
+            <h1>Ask the Magic Backpack 🧙‍♂️🎒</h1>
+            <form method="post" action="/ask">
+                <textarea name="prompt" rows="5" cols="60" placeholder="Enter your question here..."></textarea><br><br>
+                <input type="submit" value="Ask">
+            </form>
+        </body>
+    </html>
+    """
+
+# Handle form submission
+@app.post("/ask", response_class=HTMLResponse)
+async def ask(prompt: str = Form(...)):
+    headers = {
+        "Content-Type": "application/json"
+    }
+
+    if not ENDPOINT_URL.startswith("http://localhost"):
+        API_KEY = get_api_key()
+        headers["Authorization"] = f"Bearer {API_KEY}"
+    else:
+        logging.info("Skipping Authorization header for localhost endpoint.")
+
+    payload = {
+        "model": os.getenv("MODEL", "gpt-4-turbo"),
+        "messages": [
+            {"role": "user", "content": prompt}
+        ],
+        "stream": False
+    }
+
+    # Log request details for debugging
+    logging.info(f"Sending POST to {ENDPOINT_URL}")
+    logging.info(f"Request Headers: {headers}")
+    logging.info(f"Request Payload: {payload}")
+
+    response = requests.post(f"{ENDPOINT_URL}/chat/completions", headers=headers, data=json.dumps(payload))
+
+    if response.status_code == 200:
+        data = response.json()
+        try:
+            reply = data["choices"][0]["message"]["content"]
+        except (KeyError, IndexError):
+            reply = "Model returned an unexpected response."
+    else:
+        # Log error details
+        logging.error(f"Error from server: {response.status_code} - {response.text}")
+        reply = f"Error: {response.status_code} - {response.text}"
+
+    # Return result
+    return f"""
+    <html>
+        <head><title>Ask the Model</title></head>
+        <body>
+            <h1>Ask the Magic Backpack 🧙‍♂️🎒</h1>
+            <form method="post" action="/ask">
+                <textarea name="prompt" rows="5" cols="60" placeholder="Enter your question here...">{prompt}</textarea><br><br>
+                <input type="submit" value="Ask">
+            </form>
+            <hr>
+            <h2>Model's Reply:</h2>
+            <p>{reply}</p>
+        </body>
+    </html>
+    """
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
diff --git a/samples/managed-llm/app/requirements.txt b/samples/managed-llm/app/requirements.txt
@@ -0,0 +1,5 @@
+dotenv
+fastapi
+python-multipart
+requests
+uvicorn
diff --git a/samples/managed-llm/compose.dev.yaml b/samples/managed-llm/compose.dev.yaml
@@ -0,0 +1,9 @@
+services:
+  app:
+    extends:
+      file: compose.yaml
+      service: app
+    environment:
+      - ENDPOINT_URL=https://api.openai.com/v1 # endpoint of the LLM used for local testing
+      - OPENAI_API_KEY=${OPENAI_API_KEY} # your OpenAI API key for local testing
+      - MODEL=gpt-4-turbo # LLM model ID used for local testing
diff --git a/samples/managed-llm/compose.yaml b/samples/managed-llm/compose.yaml
@@ -0,0 +1,29 @@
+services:
+  app:
+    build: 
+      context: ./app
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    restart: always
+    environment:
+      - ENDPOINT_URL=http://llm-gateway:80 # endpoint to the LLM gateway service
+      - OPENAI_API_KEY=optionalkey # this value will be ignored in production when using the gateway
+      - MODEL=${MODEL} # LLM model ID used for the gateway
+    healthcheck:
+      test: ["CMD", "wget", "--spider", "http://localhost:8000/health"]
+
+# Defang OpenAI Access Gateway
+# This service is used to route requests to the LLM API
+  llm-gateway:
+    x-defang-llm: true # this must be set to true for the LLM gateway to work
+    image: defangio/openai-access-gateway:latest
+    ports:
+      - target: 80
+        published: 80
+        protocol: tcp
+        mode: ingress
+    env_file:
+      - .env
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:80/health"]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+`
	`2`	`+FROM mcr.microsoft.com/devcontainers/python:alpine3.13`