fix env var names

jordanstephens · jordanstephens · commit c7b371286810 · 2025-05-23T11:12:03.000-07:00
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1,2 @@
+
+FROM mcr.microsoft.com/devcontainers/python:alpine3.13
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,11 @@
+{
+  "build": {
+    "dockerfile": "Dockerfile",
+    "context": ".."
+  },
+  "features": {
+    "ghcr.io/defanglabs/devcontainer-feature/defang-cli:1.0.4": {},
+    "ghcr.io/devcontainers/features/docker-in-docker:2": {},
+    "ghcr.io/devcontainers/features/aws-cli:1": {}
+  }
+}
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
@@ -0,0 +1,25 @@
+name: Deploy
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    environment: playground
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v4
+
+    - name: Deploy
+      uses: DefangLabs/defang-github-action@v1.2.0
+      with:
+        config-env-vars: MODEL
+      env:
+        MODEL: ${{ secrets.MODEL }}
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.env
+myenv
+__pycache__/
diff --git a/README.md b/README.md
@@ -0,0 +1,66 @@
+# Managed LLM with Docker Model Provider
+
+[![1-click-deploy](https://raw.githubusercontent.com/DefangLabs/defang-assets/main/Logos/Buttons/SVG/deploy-with-defang.svg)](https://portal.defang.dev/redirect?url=https%3A%2F%2Fgithub.com%2Fnew%3Ftemplate_name%3Dsample-managed-llm-provider-template%26template_owner%3DDefangSamples)
+
+This sample application demonstrates using Managed LLMs with a Docker Model Provider, deployed with Defang.
+
+> Note: This version uses a [Docker Model Provider](https://docs.docker.com/compose/how-tos/model-runner/#provider-services) for managing LLMs. For the version with Defang's [OpenAI Access Gateway](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway), please see our [*Managed LLM Sample*](https://github.com/DefangLabs/samples/tree/main/samples/managed-llm) instead.
+
+The Docker Model Provider allows users to run LLMs locally using `docker compose`. It is a service with `provider:` in the `compose.yaml` file.
+Defang will transparently fixup your project to use AWS Bedrock or Google Cloud Vertex AI models during deployment.
+
+You can configure the `LLM_MODEL` and `LLM_URL` for the LLM separately for local development and production environments.
+* The `LLM_MODEL` is the LLM Model ID you are using.
+* The `LLM_URL` will be set by Docker and during deployment Defang will provide authenticated access to the LLM model in the cloud.
+
+Ensure you have enabled model access for the model you intend to use. To do this, you can check your [AWS Bedrock model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) or [GCP Vertex AI model access](https://cloud.google.com/vertex-ai/generative-ai/docs/control-model-access).
+
+To learn about available LLM models in Defang, please see our [Model Mapping documentation](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping). 
+
+For more about Managed LLMs in Defang, please see our [Managed LLMs documentation](https://docs.defang.io/docs/concepts/managed-llms/managed-language-models).
+
+### Docker Model Provider
+
+In the `compose.yaml` file, the `llm` service will route requests to the LLM API model using a [Docker Model Provider](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#docker-model-provider-services).
+
+The `x-defang-llm` property on the `llm` service must be set to `true` in order to use the Docker Model Provider when deploying with Defang.
+
+## Prerequisites
+
+1. Download [Defang CLI](https://github.com/DefangLabs/defang)
+2. (Optional) If you are using [Defang BYOC](https://docs.defang.io/docs/concepts/defang-byoc) authenticate with your cloud provider account
+3. (Optional for local development) [Docker CLI](https://docs.docker.com/engine/install/)
+
+## Development
+
+To run the application locally, you can use the following command:
+
+```bash
+docker compose -f compose.local.yaml up --build
+```
+
+## Deployment
+
+> [!NOTE]
+> Download [Defang CLI](https://github.com/DefangLabs/defang)
+
+### Defang Playground
+
+Deploy your application to the Defang Playground by opening up your terminal and typing:
+```bash
+defang compose up
+```
+
+### BYOC
+
+If you want to deploy to your own cloud account, you can [use Defang BYOC](https://docs.defang.io/docs/tutorials/deploy-to-your-cloud).
+
+---
+
+Title: Managed LLM with Docker Model Provider
+
+Short Description: An app using Managed LLMs with a Docker Model Provider, deployed with Defang.
+
+Tags: LLM, Python, Bedrock, Vertex, Docker Model Provider
+
+Languages: Python
diff --git a/app/.dockerignore b/app/.dockerignore
@@ -0,0 +1,27 @@
+# Default .dockerignore file for Defang
+**/__pycache__
+**/.direnv
+**/.DS_Store
+**/.envrc
+**/.git
+**/.github
+**/.idea
+**/.next
+**/.vscode
+**/compose.*.yaml
+**/compose.*.yml
+**/compose.yaml
+**/compose.yml
+**/docker-compose.*.yaml
+**/docker-compose.*.yml
+**/docker-compose.yaml
+**/docker-compose.yml
+**/node_modules
+**/Thumbs.db
+Dockerfile
+*.Dockerfile
+# Ignore our own binary, but only in the root to avoid ignoring subfolders
+defang
+defang.exe
+# Ignore our project-level state
+.defang
diff --git a/app/Dockerfile b/app/Dockerfile
@@ -0,0 +1,22 @@
+FROM python:3.12-slim
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirement files first (for better Docker cache)
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the code
+COPY . .
+
+# Expose the port that Uvicorn will run on
+EXPOSE 8000
+
+# Set environment variable for the port
+ENV PORT=8000
+
+# Run the app with the correct module path using shell form to interpolate environment variable
+CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port $PORT"]
diff --git a/app/app.py b/app/app.py
@@ -0,0 +1,85 @@
+import json
+import logging
+import os
+
+import requests
+from fastapi import FastAPI, Form
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import JSONResponse
+from fastapi.responses import FileResponse
+
+app = FastAPI()
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+# Configure basic logging
+logging.basicConfig(level=logging.INFO)
+
+default_openai_base_url = "https://api.openai.com/v1/"
+
+# Set the environment variables for the chat model
+LLM_URL = os.getenv("LLM_URL", default_openai_base_url) + "chat/completions"
+# Fallback to OpenAI Model if not set in environment
+MODEL_ID = os.getenv("LLM_MODEL", "gpt-4-turbo")
+
+# Get the API key for the LLM
+# For development, you have the option to use your local API key. In production, the LLM gateway service will override the need for it.
+def get_api_key():
+    return os.getenv("OPENAI_API_KEY", "")
+
+# Home page form
+@app.get("/", response_class=HTMLResponse)
+async def home():
+    return FileResponse("static/index.html", media_type="text/html")
+
+# Handle form submission
+@app.post("/ask", response_class=JSONResponse)
+async def ask(prompt: str = Form(...)):
+    payload = {
+        "model": MODEL_ID,
+        "messages": [
+            {"role": "user", "content": prompt}
+        ],
+        "stream": False
+    }
+
+    reply = get_llm_response(payload)
+
+    return {"prompt": prompt, "reply": reply}
+
+def get_llm_response(payload):
+    api_key = get_api_key()
+    request_headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+
+    # Log request details
+    logging.info(f"Sending POST to {LLM_URL}")
+    logging.info(f"Request Headers: {request_headers}")
+    logging.info(f"Request Payload: {payload}")
+
+    response = None
+    try:
+        response = requests.post(f"{LLM_URL}", headers=request_headers, data=json.dumps(payload))
+    except requests.exceptions.HTTPError as errh:
+        return f"HTTP error:", errh
+    except requests.exceptions.ConnectionError as errc:
+        return f"Connection error:", errc
+    except requests.exceptions.Timeout as errt:
+        return f"Timeout error:", errt
+    except requests.exceptions.RequestException as err:
+        return f"Unexpected error:", err
+
+    if response is None:
+        return f"Error: No response from server."
+    if response.status_code == 400:
+        return f"Connect Error: {response.status_code} - {response.text}"
+    if response.status_code == 500:
+        return f"Error from server: {response.status_code} - {response.text}"
+
+    try:
+        data = response.json()
+        return data["choices"][0]["message"]["content"]
+    except (KeyError, IndexError):
+        return "Model returned an unexpected response."
diff --git a/app/requirements.txt b/app/requirements.txt
@@ -0,0 +1,5 @@
+dotenv
+fastapi
+python-multipart
+requests
+uvicorn
diff --git a/app/static/app.js b/app/static/app.js
@@ -0,0 +1,14 @@
+async function submitForm(event) {
+    event.preventDefault();
+    const prompt = document.getElementById('prompt').value;
+    document.getElementById('reply').innerHTML = "Loading...";
+    const response = await fetch('/ask', {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/x-www-form-urlencoded'
+        },
+        body: new URLSearchParams({prompt})
+    });
+    const data = await response.json();
+    document.getElementById('reply').innerHTML = data.reply || "No reply found.";
+}
diff --git a/app/static/index.html b/app/static/index.html
@@ -0,0 +1,18 @@
+<html>
+    <head>
+        <title>Ask the AI Model</title>
+        <script type="text/javascript" src="./static/app.js"></script>
+    </head>
+    <body>
+        <h1>Ask the AI Model</h1>
+        <form method="post" id="askForm" onsubmit="event.preventDefault(); submitForm(event);">
+            <textarea id="prompt" name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
+              onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.dispatchEvent(new Event('submit', {cancelable:true}));}"></textarea>
+            <br><br>
+            <input type="submit" value="Ask">
+        </form>
+        <hr>
+        <h2>Model's Reply:</h2>
+        <p id="reply"></p>
+    </body>
+</html>
diff --git a/compose.local.yaml b/compose.local.yaml
@@ -0,0 +1,19 @@
+services:
+  app:
+    extends:
+      file: compose.yaml
+      service: app
+    volumes:
+      - ./app:/app
+  llm:
+    extends:
+      file: compose.yaml
+      service: llm
+    # if using AWS Bedrock for local development, include this section:
+    environment:
+      - AWS_REGION=${AWS_REGION} # replace with your AWS region
+      - AWS_PROFILE=${AWS_PROFILE} # replace with your AWS profile name
+    volumes:
+      - type: bind
+        source: ~/.aws
+        target: /root/.aws
diff --git a/compose.yaml b/compose.yaml
@@ -0,0 +1,28 @@
+services:
+  app:
+    build:
+      context: ./app
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    restart: always
+    environment:
+      - LLM_MODEL=default
+      # For other models, see https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 5s
+    depends_on:
+      - llm
+
+# Provider Service
+# This service is used to route requests to the LLM API
+  llm:
+    provider:
+      type: model
+      options:
+        model: ai/smollm2
+    x-defang-llm: true

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+`
	`2`	`+FROM mcr.microsoft.com/devcontainers/python:alpine3.13`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +dotenv
 +fastapi
 +python-multipart
 +requests
 +uvicorn