From 6ca20e8aa5e128c763f952a5499a73e0cdf63b11 Mon Sep 17 00:00:00 2001 From: Lionello Lunesu Date: Tue, 20 May 2025 15:59:42 -0700 Subject: [PATCH 1/2] Fix provider llm sample --- samples/managed-llm-provider/README.md | 15 ++++++++------- samples/managed-llm-provider/app/Dockerfile | 2 +- samples/managed-llm-provider/app/app.py | 8 ++++---- samples/managed-llm-provider/compose.yaml | 7 +++++-- samples/managed-llm/app/Dockerfile | 2 +- samples/managed-llm/compose.yaml | 6 ++++-- 6 files changed, 23 insertions(+), 17 deletions(-) diff --git a/samples/managed-llm-provider/README.md b/samples/managed-llm-provider/README.md index 543a77af..c7266bd5 100644 --- a/samples/managed-llm-provider/README.md +++ b/samples/managed-llm-provider/README.md @@ -6,11 +6,12 @@ This sample application demonstrates using Managed LLMs with a Docker Model Prov > Note: This version uses a [Docker Model Provider](https://docs.docker.com/compose/how-tos/model-runner/#provider-services) for managing LLMs. For the version with Defang's [OpenAI Access Gateway](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway), please see our [*Managed LLM Sample*](https://github.com/DefangLabs/samples/tree/main/samples/managed-llm) instead. -The Docker Model Provider allows users to use AWS Bedrock or Google Cloud Vertex AI models with their application. It is a service in the `compose.yaml` file. +The Docker Model Provider allows users to run LLMs locally using `docker compose`. It is a service with `provider:` in the `compose.yaml` file. +Defang will transparently fixup your project to use AWS Bedrock or Google Cloud Vertex AI models during deployment. -You can configure the `MODEL` and `ENDPOINT_URL` for the LLM separately for local development and production environments. -* The `MODEL` is the LLM Model ID you are using. -* The `ENDPOINT_URL` is the bridge that provides authenticated access to the LLM model. +You can configure the `LLM_MODEL` and `LLM_URL` for the LLM separately for local development and production environments. +* The `LLM_MODEL` is the LLM Model ID you are using. +* The `LLM_URL` will be set by Docker and during deployment Defang will provide authenticated access to the LLM model in the cloud. Ensure you have enabled model access for the model you intend to use. To do this, you can check your [AWS Bedrock model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) or [GCP Vertex AI model access](https://cloud.google.com/vertex-ai/generative-ai/docs/control-model-access). @@ -38,14 +39,14 @@ docker compose -f compose.dev.yaml up --build ## Configuration -For this sample, you will need to provide the following [configuration](https://docs.defang.io/docs/concepts/configuration): +For this sample, you will need to provide the following [configuration](https://docs.defang.io/docs/concepts/configuration): > Note that if you are using the 1-click deploy option, you can set these values as secrets in your GitHub repository and the action will automatically deploy them for you. -### `MODEL` +### `LLM_MODEL` The Model ID of the LLM you are using for your application. For example, `anthropic.claude-3-haiku-20240307-v1:0`. ```bash -defang config set MODEL +defang config set LLM_MODEL ``` ## Deployment diff --git a/samples/managed-llm-provider/app/Dockerfile b/samples/managed-llm-provider/app/Dockerfile index d3e172fa..9583776f 100644 --- a/samples/managed-llm-provider/app/Dockerfile +++ b/samples/managed-llm-provider/app/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/docker/library/python:3.12-slim +FROM python:3.12-slim # Set working directory WORKDIR /app diff --git a/samples/managed-llm-provider/app/app.py b/samples/managed-llm-provider/app/app.py index 71509ea9..c2b8741a 100644 --- a/samples/managed-llm-provider/app/app.py +++ b/samples/managed-llm-provider/app/app.py @@ -12,9 +12,9 @@ logging.basicConfig(level=logging.INFO) # Set the environment variables for the chat model -ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions") +LLM_URL = os.getenv("LLM_URL", "https://api.openai.com/v1/") + "chat/completions" # Fallback to OpenAI Model if not set in environment -MODEL_ID = os.getenv("MODEL", "gpt-4-turbo") +MODEL_ID = os.getenv("LLM_MODEL", "gpt-4-turbo") # Get the API key for the LLM # For development, you can use your local API key. In production, the LLM gateway service will override the need for it. @@ -60,14 +60,14 @@ async def ask(prompt: str = Form(...)): } # Log request details - logging.info(f"Sending POST to {ENDPOINT_URL}") + logging.info(f"Sending POST to {LLM_URL}") logging.info(f"Request Headers: {headers}") logging.info(f"Request Payload: {payload}") response = None reply = None try: - response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload)) + response = requests.post(f"{LLM_URL}", headers=headers, data=json.dumps(payload)) except requests.exceptions.HTTPError as errh: reply = f"HTTP error:", errh except requests.exceptions.ConnectionError as errc: diff --git a/samples/managed-llm-provider/compose.yaml b/samples/managed-llm-provider/compose.yaml index 22aa83dc..4493e516 100644 --- a/samples/managed-llm-provider/compose.yaml +++ b/samples/managed-llm-provider/compose.yaml @@ -7,18 +7,21 @@ services: - "8000:8000" restart: always environment: - - ENDPOINT_URL=http://llm/api/v1/chat/completions # endpoint to the Provider Service - - MODEL=anthropic.claude-3-haiku-20240307-v1:0 # LLM model ID used in the Provider Service + - LLM_MODEL healthcheck: test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"] interval: 30s timeout: 5s retries: 3 start_period: 5s + depends_on: + - llm # Provider Service # This service is used to route requests to the LLM API llm: provider: type: model + options: + model: ai/smollm2 x-defang-llm: true diff --git a/samples/managed-llm/app/Dockerfile b/samples/managed-llm/app/Dockerfile index d3e172fa..9583776f 100644 --- a/samples/managed-llm/app/Dockerfile +++ b/samples/managed-llm/app/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/docker/library/python:3.12-slim +FROM python:3.12-slim # Set working directory WORKDIR /app diff --git a/samples/managed-llm/compose.yaml b/samples/managed-llm/compose.yaml index 31ce0754..f99d3cc4 100644 --- a/samples/managed-llm/compose.yaml +++ b/samples/managed-llm/compose.yaml @@ -1,6 +1,6 @@ services: app: - build: + build: context: ./app dockerfile: Dockerfile ports: @@ -8,7 +8,7 @@ services: restart: always environment: - ENDPOINT_URL=http://llm/api/v1/chat/completions # endpoint to the gateway service - - MODEL=anthropic.claude-3-haiku-20240307-v1:0 # LLM model ID used for the gateway + - MODEL=us.amazon.nova-micro-v1:0 # LLM model ID used for the gateway - OPENAI_API_KEY=FAKE_TOKEN # the actual value will be ignored when using the gateway, but it should match the one in the llm service healthcheck: test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"] @@ -16,6 +16,8 @@ services: timeout: 5s retries: 3 start_period: 5s + depends_on: + - llm # Defang OpenAI Access Gateway # This service is used to route requests to the LLM API From ae3065e51c248281347a522b955648744530b72a Mon Sep 17 00:00:00 2001 From: "Linda L." Date: Tue, 20 May 2025 17:31:04 -0700 Subject: [PATCH 2/2] Update samples/managed-llm-provider/compose.yaml --- samples/managed-llm-provider/compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/managed-llm-provider/compose.yaml b/samples/managed-llm-provider/compose.yaml index bb56d87a..88ac5c43 100644 --- a/samples/managed-llm-provider/compose.yaml +++ b/samples/managed-llm-provider/compose.yaml @@ -7,7 +7,7 @@ services: - "8000:8000" restart: always environment: - - LLM_MODEL=default # LLM model ID used in the Provider Service + - LLM_MODEL # LLM model ID used # For other models, see https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping healthcheck: test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]