Merge pull request #372 from DefangLabs/linda-managed-llm-provider

jordanstephens · web-flow · commit e0d02ad4755b · 2025-05-16T12:24:37.000-07:00
Managed LLM (with Provider) Sample
diff --git a/samples/managed-llm-provider/.devcontainer/Dockerfile b/samples/managed-llm-provider/.devcontainer/Dockerfile
@@ -0,0 +1,2 @@
+
+FROM mcr.microsoft.com/devcontainers/python:alpine3.13
diff --git a/samples/managed-llm-provider/.devcontainer/devcontainer.json b/samples/managed-llm-provider/.devcontainer/devcontainer.json
@@ -0,0 +1,11 @@
+{
+  "build": {
+    "dockerfile": "Dockerfile",
+    "context": ".."
+  },
+  "features": {
+    "ghcr.io/defanglabs/devcontainer-feature/defang-cli:1.0.4": {},
+    "ghcr.io/devcontainers/features/docker-in-docker:2": {},
+    "ghcr.io/devcontainers/features/aws-cli:1": {}
+  }
+}
diff --git a/samples/managed-llm-provider/.dockerignore b/samples/managed-llm-provider/.dockerignore
@@ -0,0 +1,14 @@
+# Default .dockerignore file for Defang
+**/__pycache__
+**/.git
+**/.github
+**/compose.*.yaml
+**/compose.*.yml
+**/compose.yaml
+**/compose.yml
+**/docker-compose.*.yaml
+**/docker-compose.*.yml
+**/docker-compose.yaml
+**/docker-compose.yml
+Dockerfile
+*.Dockerfile
diff --git a/samples/managed-llm-provider/.github/workflows/deploy.yaml b/samples/managed-llm-provider/.github/workflows/deploy.yaml
@@ -0,0 +1,25 @@
+name: Deploy
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    environment: playground
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v4
+
+    - name: Deploy
+      uses: DefangLabs/defang-github-action@v1.2.0
+      with:
+        config-env-vars: MODEL
+      env:
+        MODEL: ${{ secrets.MODEL }}
diff --git a/samples/managed-llm-provider/.gitignore b/samples/managed-llm-provider/.gitignore
@@ -0,0 +1,3 @@
+.env
+myenv
+__pycache__/
diff --git a/samples/managed-llm-provider/README.md b/samples/managed-llm-provider/README.md
@@ -0,0 +1,73 @@
+# Managed LLM with Docker Model Provider
+
+[![1-click-deploy](https://raw.githubusercontent.com/DefangLabs/defang-assets/main/Logos/Buttons/SVG/deploy-with-defang.svg)](https://portal.defang.dev/redirect?url=https%3A%2F%2Fgithub.com%2Fnew%3Ftemplate_name%3Dsample-managed-llm-provider-template%26template_owner%3DDefangSamples)
+
+This sample application demonstrates using Managed LLMs with a Docker Model Provider, deployed with Defang.
+
+> Note: This version uses a [Docker Model Provider](https://docs.docker.com/compose/how-tos/model-runner/#provider-services) for managing LLMs. For the version with Defang's [OpenAI Access Gateway](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway), please see our [*Managed LLM Sample*](https://github.com/DefangLabs/samples/tree/main/samples/managed-llm) instead.
+
+The Docker Model Provider allows users to use AWS Bedrock or Google Cloud Vertex AI models with their application. It is a service in the `compose.yaml` file.
+
+You can configure the `MODEL` and `ENDPOINT_URL` for the LLM separately for local development and production environments.
+* The `MODEL` is the LLM Model ID you are using.
+* The `ENDPOINT_URL` is the bridge that provides authenticated access to the LLM model. 
+
+Ensure you have enabled model access for the model you intend to use. To do this, you can check your [AWS Bedrock model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) or [GCP Vertex AI model access](https://cloud.google.com/vertex-ai/generative-ai/docs/control-model-access).
+
+### Docker Model Provider
+
+In the `compose.yaml` file, the `llm` service will route requests to the LLM API model using a [Docker Model Provider](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#docker-model-provider-services).
+
+The `x-defang-llm` property on the `llm` service must be set to `true` in order to use the Docker Model Provider when deploying with Defang.
+
+## Prerequisites
+
+1. Download [Defang CLI](https://github.com/DefangLabs/defang)
+2. (Optional) If you are using [Defang BYOC](https://docs.defang.io/docs/concepts/defang-byoc) authenticate with your cloud provider account
+3. (Optional for local development) [Docker CLI](https://docs.docker.com/engine/install/)
+
+## Development
+
+To run the application locally, you can use the following command:
+
+```bash
+docker compose -f compose.dev.yaml up --build
+```
+
+## Configuration
+
+For this sample, you will need to provide the following [configuration](https://docs.defang.io/docs/concepts/configuration): 
+
+> Note that if you are using the 1-click deploy option, you can set these values as secrets in your GitHub repository and the action will automatically deploy them for you.
+
+### `MODEL`
+The Model ID of the LLM you are using for your application. For example, `anthropic.claude-3-5-sonnet-20241022-v2:0`.
+```bash
+defang config set MODEL
+```
+
+## Deployment
+
+> [!NOTE]
+> Download [Defang CLI](https://github.com/DefangLabs/defang)
+
+### Defang Playground
+
+Deploy your application to the Defang Playground by opening up your terminal and typing:
+```bash
+defang compose up
+```
+
+### BYOC
+
+If you want to deploy to your own cloud account, you can [use Defang BYOC](https://docs.defang.io/docs/tutorials/deploy-to-your-cloud).
+
+---
+
+Title: Managed LLM with Docker Model Provider
+
+Short Description: An app using Managed LLMs with a Docker Model Provider, deployed with Defang.
+
+Tags: LLM, Python, Bedrock, Vertex, Docker Model Provider
+
+Languages: Python
diff --git a/samples/managed-llm-provider/app/.dockerignore b/samples/managed-llm-provider/app/.dockerignore
@@ -0,0 +1,27 @@
+# Default .dockerignore file for Defang
+**/__pycache__
+**/.direnv
+**/.DS_Store
+**/.envrc
+**/.git
+**/.github
+**/.idea
+**/.next
+**/.vscode
+**/compose.*.yaml
+**/compose.*.yml
+**/compose.yaml
+**/compose.yml
+**/docker-compose.*.yaml
+**/docker-compose.*.yml
+**/docker-compose.yaml
+**/docker-compose.yml
+**/node_modules
+**/Thumbs.db
+Dockerfile
+*.Dockerfile
+# Ignore our own binary, but only in the root to avoid ignoring subfolders
+defang
+defang.exe
+# Ignore our project-level state
+.defang
diff --git a/samples/managed-llm-provider/app/Dockerfile b/samples/managed-llm-provider/app/Dockerfile
@@ -0,0 +1,22 @@
+FROM public.ecr.aws/docker/library/python:3.12-slim
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirement files first (for better Docker cache)
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the code
+COPY . .
+
+# Expose the port that Uvicorn will run on
+EXPOSE 8000
+
+# Set environment variable for the port
+ENV PORT=8000
+
+# Run the app with the correct module path using shell form to interpolate environment variable
+CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port $PORT"]
diff --git a/samples/managed-llm-provider/app/app.py b/samples/managed-llm-provider/app/app.py
@@ -0,0 +1,116 @@
+import os
+import json
+import logging
+from fastapi import FastAPI, Form, Request
+from fastapi.responses import HTMLResponse
+import requests
+
+app = FastAPI()
+
+# Configure basic logging
+logging.basicConfig(level=logging.INFO)
+
+# Set the environment variables for the chat model
+ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions")
+# Fallback to OpenAI Model if not set in environment
+MODEL_ID = os.getenv("MODEL", "gpt-4-turbo")
+
+# Get the API key for the LLM
+# For development, you can use your local API key. In production, the LLM gateway service will override the need for it.
+def get_api_key():
+    return os.getenv("OPENAI_API_KEY", "API key not set")
+
+# Home page form
+@app.get("/", response_class=HTMLResponse)
+async def home():
+    return """
+    <html>
+        <head><title>Ask the AI Model</title></head>
+        <body>
+            <h1>Ask the AI Model</h1>
+            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
+                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..." 
+                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.submit();}">
+                </textarea>
+                <br><br>
+                <input type="submit" value="Ask">
+            </form>
+        </body>
+        
+    </html>
+    """
+
+# Handle form submission
+@app.post("/ask", response_class=HTMLResponse)
+async def ask(prompt: str = Form(...)):
+    headers = {
+        "Content-Type": "application/json"
+    }
+
+    api_key = get_api_key()
+    headers["Authorization"] = f"Bearer {api_key}"
+
+    payload = {
+        "model": MODEL_ID,
+        "messages": [
+            {"role": "user", "content": prompt}
+        ],
+        "stream": False
+    }
+
+    # Log request details
+    logging.info(f"Sending POST to {ENDPOINT_URL}")
+    logging.info(f"Request Headers: {headers}")
+    logging.info(f"Request Payload: {payload}")
+
+    response = None
+    reply = None
+    try:
+        response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload))
+    except requests.exceptions.HTTPError as errh:
+        reply = f"HTTP error:", errh
+    except requests.exceptions.ConnectionError as errc:
+        reply = f"Connection error:", errc
+    except requests.exceptions.Timeout as errt:
+        reply = f"Timeout error:", errt
+    except requests.exceptions.RequestException as err:
+        reply = f"Unexpected error:", err
+
+    if response is not None:
+        # logging.info(f"Response Status Code: {response.status_code}")
+        # logging.info(f"Response Headers: {response.headers}")
+        # logging.info(f"Response Body: {response.text}")
+        if response.status_code == 200:
+            data = response.json()
+            try:
+                reply = data["choices"][0]["message"]["content"]
+            except (KeyError, IndexError):
+                reply = "Model returned an unexpected response."
+        elif response.status_code == 400:
+            reply = f"Connect Error: {response.status_code} - {response.text}"
+        elif response.status_code == 500:
+            reply = f"Error from server: {response.status_code} - {response.text}"
+        else:
+            # Log error details
+            reply = f"Error from server: {response.status_code} - {response.text}"
+            logging.error(f"Error from server: {response.status_code} - {response.text}")
+
+    # Return result
+    return f"""
+    <html>
+        <head><title>Ask the AI Model</title></head>
+        <body>
+            <h1>Ask the AI Model</h1>
+            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
+                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
+                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){{event.preventDefault();this.form.submit();}}"></textarea><br><br>
+                <input type="submit" value="Ask">
+            </form>
+            <h2>You Asked:</h2>
+            <p>{prompt}</p>
+            <hr>
+            <h2>Model's Reply:</h2>
+            <p>{reply}</p>
+        </body>
+    </html>
+    """
diff --git a/samples/managed-llm-provider/app/requirements.txt b/samples/managed-llm-provider/app/requirements.txt
@@ -0,0 +1,5 @@
+dotenv
+fastapi
+python-multipart
+requests
+uvicorn
diff --git a/samples/managed-llm-provider/compose.local.yaml b/samples/managed-llm-provider/compose.local.yaml
@@ -0,0 +1,17 @@
+services:
+  app:
+    extends:
+      file: compose.yaml
+      service: app
+  llm:
+    extends:
+        file: compose.yaml
+        service: llm
+    # if using AWS Bedrock for local development, include this section:
+    environment:
+      - AWS_REGION=${AWS_REGION} # replace with your AWS region
+      - AWS_PROFILE=${AWS_PROFILE} # replace with your AWS profile name
+    volumes:
+      - type: bind
+        source: ~/.aws
+        target: /root/.aws
diff --git a/samples/managed-llm-provider/compose.yaml b/samples/managed-llm-provider/compose.yaml
@@ -0,0 +1,25 @@
+services:
+  app:
+    build: 
+      context: ./app
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    restart: always
+    environment:
+      - ENDPOINT_URL=http://llm/api/v1/chat/completions   # endpoint to the Provider Service
+      - MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0   # LLM model ID used in the Provider Service
+      - OPENAI_API_KEY=FAKE_TOKEN   # the actual value will be ignored when using the Provider Service
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 5s
+
+# Provider Service
+# This service is used to route requests to the LLM API
+  llm:
+    provider:
+      type: model
+    x-defang-llm: true

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+`
	`2`	`+FROM mcr.microsoft.com/devcontainers/python:alpine3.13`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +dotenv
 +fastapi
 +python-multipart
 +requests
 +uvicorn