diff --git a/samples/managed-llm-provider/.devcontainer/Dockerfile b/samples/managed-llm-provider/.devcontainer/Dockerfile new file mode 100644 index 00000000..63fdc020 --- /dev/null +++ b/samples/managed-llm-provider/.devcontainer/Dockerfile @@ -0,0 +1,2 @@ + +FROM mcr.microsoft.com/devcontainers/python:alpine3.13 diff --git a/samples/managed-llm-provider/.devcontainer/devcontainer.json b/samples/managed-llm-provider/.devcontainer/devcontainer.json new file mode 100644 index 00000000..4c83ef92 --- /dev/null +++ b/samples/managed-llm-provider/.devcontainer/devcontainer.json @@ -0,0 +1,11 @@ +{ + "build": { + "dockerfile": "Dockerfile", + "context": ".." + }, + "features": { + "ghcr.io/defanglabs/devcontainer-feature/defang-cli:1.0.4": {}, + "ghcr.io/devcontainers/features/docker-in-docker:2": {}, + "ghcr.io/devcontainers/features/aws-cli:1": {} + } +} diff --git a/samples/managed-llm-provider/.dockerignore b/samples/managed-llm-provider/.dockerignore new file mode 100644 index 00000000..08e28b5c --- /dev/null +++ b/samples/managed-llm-provider/.dockerignore @@ -0,0 +1,14 @@ +# Default .dockerignore file for Defang +**/__pycache__ +**/.git +**/.github +**/compose.*.yaml +**/compose.*.yml +**/compose.yaml +**/compose.yml +**/docker-compose.*.yaml +**/docker-compose.*.yml +**/docker-compose.yaml +**/docker-compose.yml +Dockerfile +*.Dockerfile diff --git a/samples/managed-llm-provider/.github/workflows/deploy.yaml b/samples/managed-llm-provider/.github/workflows/deploy.yaml new file mode 100644 index 00000000..0dade795 --- /dev/null +++ b/samples/managed-llm-provider/.github/workflows/deploy.yaml @@ -0,0 +1,25 @@ +name: Deploy + +on: + push: + branches: + - main + +jobs: + deploy: + environment: playground + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + + - name: Deploy + uses: DefangLabs/defang-github-action@v1.2.0 + with: + config-env-vars: MODEL + env: + MODEL: ${{ secrets.MODEL }} \ No newline at end of file diff --git a/samples/managed-llm-provider/.gitignore b/samples/managed-llm-provider/.gitignore new file mode 100644 index 00000000..76024c6f --- /dev/null +++ b/samples/managed-llm-provider/.gitignore @@ -0,0 +1,3 @@ +.env +myenv +__pycache__/ diff --git a/samples/managed-llm-provider/README.md b/samples/managed-llm-provider/README.md new file mode 100644 index 00000000..8e825bc6 --- /dev/null +++ b/samples/managed-llm-provider/README.md @@ -0,0 +1,73 @@ +# Managed LLM with Docker Model Provider + +[![1-click-deploy](https://raw.githubusercontent.com/DefangLabs/defang-assets/main/Logos/Buttons/SVG/deploy-with-defang.svg)](https://portal.defang.dev/redirect?url=https%3A%2F%2Fgithub.com%2Fnew%3Ftemplate_name%3Dsample-managed-llm-provider-template%26template_owner%3DDefangSamples) + +This sample application demonstrates using Managed LLMs with a Docker Model Provider, deployed with Defang. + +> Note: This version uses a [Docker Model Provider](https://docs.docker.com/compose/how-tos/model-runner/#provider-services) for managing LLMs. For the version with Defang's [OpenAI Access Gateway](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway), please see our [*Managed LLM Sample*](https://github.com/DefangLabs/samples/tree/main/samples/managed-llm) instead. + +The Docker Model Provider allows users to use AWS Bedrock or Google Cloud Vertex AI models with their application. It is a service in the `compose.yaml` file. + +You can configure the `MODEL` and `ENDPOINT_URL` for the LLM separately for local development and production environments. +* The `MODEL` is the LLM Model ID you are using. +* The `ENDPOINT_URL` is the bridge that provides authenticated access to the LLM model. + +Ensure you have enabled model access for the model you intend to use. To do this, you can check your [AWS Bedrock model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) or [GCP Vertex AI model access](https://cloud.google.com/vertex-ai/generative-ai/docs/control-model-access). + +### Docker Model Provider + +In the `compose.yaml` file, the `llm` service will route requests to the LLM API model using a [Docker Model Provider](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#docker-model-provider-services). + +The `x-defang-llm` property on the `llm` service must be set to `true` in order to use the Docker Model Provider when deploying with Defang. + +## Prerequisites + +1. Download [Defang CLI](https://github.com/DefangLabs/defang) +2. (Optional) If you are using [Defang BYOC](https://docs.defang.io/docs/concepts/defang-byoc) authenticate with your cloud provider account +3. (Optional for local development) [Docker CLI](https://docs.docker.com/engine/install/) + +## Development + +To run the application locally, you can use the following command: + +```bash +docker compose -f compose.dev.yaml up --build +``` + +## Configuration + +For this sample, you will need to provide the following [configuration](https://docs.defang.io/docs/concepts/configuration): + +> Note that if you are using the 1-click deploy option, you can set these values as secrets in your GitHub repository and the action will automatically deploy them for you. + +### `MODEL` +The Model ID of the LLM you are using for your application. For example, `anthropic.claude-3-5-sonnet-20241022-v2:0`. +```bash +defang config set MODEL +``` + +## Deployment + +> [!NOTE] +> Download [Defang CLI](https://github.com/DefangLabs/defang) + +### Defang Playground + +Deploy your application to the Defang Playground by opening up your terminal and typing: +```bash +defang compose up +``` + +### BYOC + +If you want to deploy to your own cloud account, you can [use Defang BYOC](https://docs.defang.io/docs/tutorials/deploy-to-your-cloud). + +--- + +Title: Managed LLM with Docker Model Provider + +Short Description: An app using Managed LLMs with a Docker Model Provider, deployed with Defang. + +Tags: LLM, Python, Bedrock, Vertex, Docker Model Provider + +Languages: Python diff --git a/samples/managed-llm-provider/app/.dockerignore b/samples/managed-llm-provider/app/.dockerignore new file mode 100644 index 00000000..74a7bf9e --- /dev/null +++ b/samples/managed-llm-provider/app/.dockerignore @@ -0,0 +1,27 @@ +# Default .dockerignore file for Defang +**/__pycache__ +**/.direnv +**/.DS_Store +**/.envrc +**/.git +**/.github +**/.idea +**/.next +**/.vscode +**/compose.*.yaml +**/compose.*.yml +**/compose.yaml +**/compose.yml +**/docker-compose.*.yaml +**/docker-compose.*.yml +**/docker-compose.yaml +**/docker-compose.yml +**/node_modules +**/Thumbs.db +Dockerfile +*.Dockerfile +# Ignore our own binary, but only in the root to avoid ignoring subfolders +defang +defang.exe +# Ignore our project-level state +.defang \ No newline at end of file diff --git a/samples/managed-llm-provider/app/Dockerfile b/samples/managed-llm-provider/app/Dockerfile new file mode 100644 index 00000000..d3e172fa --- /dev/null +++ b/samples/managed-llm-provider/app/Dockerfile @@ -0,0 +1,22 @@ +FROM public.ecr.aws/docker/library/python:3.12-slim + +# Set working directory +WORKDIR /app + +# Copy requirement files first (for better Docker cache) +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the code +COPY . . + +# Expose the port that Uvicorn will run on +EXPOSE 8000 + +# Set environment variable for the port +ENV PORT=8000 + +# Run the app with the correct module path using shell form to interpolate environment variable +CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port $PORT"] diff --git a/samples/managed-llm-provider/app/app.py b/samples/managed-llm-provider/app/app.py new file mode 100644 index 00000000..80069aeb --- /dev/null +++ b/samples/managed-llm-provider/app/app.py @@ -0,0 +1,116 @@ +import os +import json +import logging +from fastapi import FastAPI, Form, Request +from fastapi.responses import HTMLResponse +import requests + +app = FastAPI() + +# Configure basic logging +logging.basicConfig(level=logging.INFO) + +# Set the environment variables for the chat model +ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions") +# Fallback to OpenAI Model if not set in environment +MODEL_ID = os.getenv("MODEL", "gpt-4-turbo") + +# Get the API key for the LLM +# For development, you can use your local API key. In production, the LLM gateway service will override the need for it. +def get_api_key(): + return os.getenv("OPENAI_API_KEY", "API key not set") + +# Home page form +@app.get("/", response_class=HTMLResponse) +async def home(): + return """ + + Ask the AI Model + +

Ask the AI Model

+
+ +

+ +
+ + + + """ + +# Handle form submission +@app.post("/ask", response_class=HTMLResponse) +async def ask(prompt: str = Form(...)): + headers = { + "Content-Type": "application/json" + } + + api_key = get_api_key() + headers["Authorization"] = f"Bearer {api_key}" + + payload = { + "model": MODEL_ID, + "messages": [ + {"role": "user", "content": prompt} + ], + "stream": False + } + + # Log request details + logging.info(f"Sending POST to {ENDPOINT_URL}") + logging.info(f"Request Headers: {headers}") + logging.info(f"Request Payload: {payload}") + + response = None + reply = None + try: + response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload)) + except requests.exceptions.HTTPError as errh: + reply = f"HTTP error:", errh + except requests.exceptions.ConnectionError as errc: + reply = f"Connection error:", errc + except requests.exceptions.Timeout as errt: + reply = f"Timeout error:", errt + except requests.exceptions.RequestException as err: + reply = f"Unexpected error:", err + + if response is not None: + # logging.info(f"Response Status Code: {response.status_code}") + # logging.info(f"Response Headers: {response.headers}") + # logging.info(f"Response Body: {response.text}") + if response.status_code == 200: + data = response.json() + try: + reply = data["choices"][0]["message"]["content"] + except (KeyError, IndexError): + reply = "Model returned an unexpected response." + elif response.status_code == 400: + reply = f"Connect Error: {response.status_code} - {response.text}" + elif response.status_code == 500: + reply = f"Error from server: {response.status_code} - {response.text}" + else: + # Log error details + reply = f"Error from server: {response.status_code} - {response.text}" + logging.error(f"Error from server: {response.status_code} - {response.text}") + + # Return result + return f""" + + Ask the AI Model + +

Ask the AI Model

+
+

+ +
+

You Asked:

+

{prompt}

+
+

Model's Reply:

+

{reply}

+ + + """ diff --git a/samples/managed-llm-provider/app/requirements.txt b/samples/managed-llm-provider/app/requirements.txt new file mode 100644 index 00000000..5273f3c9 --- /dev/null +++ b/samples/managed-llm-provider/app/requirements.txt @@ -0,0 +1,5 @@ +dotenv +fastapi +python-multipart +requests +uvicorn diff --git a/samples/managed-llm-provider/compose.local.yaml b/samples/managed-llm-provider/compose.local.yaml new file mode 100644 index 00000000..f6a8d361 --- /dev/null +++ b/samples/managed-llm-provider/compose.local.yaml @@ -0,0 +1,17 @@ +services: + app: + extends: + file: compose.yaml + service: app + llm: + extends: + file: compose.yaml + service: llm + # if using AWS Bedrock for local development, include this section: + environment: + - AWS_REGION=${AWS_REGION} # replace with your AWS region + - AWS_PROFILE=${AWS_PROFILE} # replace with your AWS profile name + volumes: + - type: bind + source: ~/.aws + target: /root/.aws diff --git a/samples/managed-llm-provider/compose.yaml b/samples/managed-llm-provider/compose.yaml new file mode 100644 index 00000000..f1541b79 --- /dev/null +++ b/samples/managed-llm-provider/compose.yaml @@ -0,0 +1,25 @@ +services: + app: + build: + context: ./app + dockerfile: Dockerfile + ports: + - "8000:8000" + restart: always + environment: + - ENDPOINT_URL=http://llm/api/v1/chat/completions # endpoint to the Provider Service + - MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0 # LLM model ID used in the Provider Service + - OPENAI_API_KEY=FAKE_TOKEN # the actual value will be ignored when using the Provider Service + healthcheck: + test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 5s + +# Provider Service +# This service is used to route requests to the LLM API + llm: + provider: + type: model + x-defang-llm: true