DefangLabs · jordanstephens · May 21, 2025 · May 20, 2025 · May 21, 2025 · May 21, 2025
diff --git a/samples/managed-llm-provider/README.md b/samples/managed-llm-provider/README.md
@@ -6,11 +6,12 @@ This sample application demonstrates using Managed LLMs with a Docker Model Prov
 
 > Note: This version uses a [Docker Model Provider](https://docs.docker.com/compose/how-tos/model-runner/#provider-services) for managing LLMs. For the version with Defang's [OpenAI Access Gateway](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway), please see our [*Managed LLM Sample*](https://github.com/DefangLabs/samples/tree/main/samples/managed-llm) instead.
 
-The Docker Model Provider allows users to use AWS Bedrock or Google Cloud Vertex AI models with their application. It is a service in the `compose.yaml` file.
+The Docker Model Provider allows users to run LLMs locally using `docker compose`. It is a service with `provider:` in the `compose.yaml` file.
+Defang will transparently fixup your project to use AWS Bedrock or Google Cloud Vertex AI models during deployment.
 
-You can configure the `MODEL` and `ENDPOINT_URL` for the LLM separately for local development and production environments.
-* The `MODEL` is the LLM Model ID you are using.
-* The `ENDPOINT_URL` is the bridge that provides authenticated access to the LLM model. 
+You can configure the `LLM_MODEL` and `LLM_URL` for the LLM separately for local development and production environments.
+* The `LLM_MODEL` is the LLM Model ID you are using.
+* The `LLM_URL` will be set by Docker and during deployment Defang will provide authenticated access to the LLM model in the cloud.
 
 Ensure you have enabled model access for the model you intend to use. To do this, you can check your [AWS Bedrock model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) or [GCP Vertex AI model access](https://cloud.google.com/vertex-ai/generative-ai/docs/control-model-access).
 

diff --git a/samples/managed-llm-provider/app/Dockerfile b/samples/managed-llm-provider/app/Dockerfile
@@ -1,4 +1,4 @@
-FROM public.ecr.aws/docker/library/python:3.12-slim
+FROM python:3.12-slim
 
 # Set working directory
 WORKDIR /app

diff --git a/samples/managed-llm-provider/app/app.py b/samples/managed-llm-provider/app/app.py
@@ -12,9 +12,9 @@
 logging.basicConfig(level=logging.INFO)
 
 # Set the environment variables for the chat model
-ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions")
+LLM_URL = os.getenv("LLM_URL", "https://api.openai.com/v1/") + "chat/completions"
 # Fallback to OpenAI Model if not set in environment
-MODEL_ID = os.getenv("MODEL", "gpt-4-turbo")
+MODEL_ID = os.getenv("LLM_MODEL", "gpt-4-turbo")
 
 # Get the API key for the LLM
 # For development, you can use your local API key. In production, the LLM gateway service will override the need for it.
@@ -60,14 +60,14 @@ async def ask(prompt: str = Form(...)):
     }
 
     # Log request details
-    logging.info(f"Sending POST to {ENDPOINT_URL}")
+    logging.info(f"Sending POST to {LLM_URL}")
     logging.info(f"Request Headers: {headers}")
     logging.info(f"Request Payload: {payload}")
 
     response = None
     reply = None
     try:
-        response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload))
+        response = requests.post(f"{LLM_URL}", headers=headers, data=json.dumps(payload))
     except requests.exceptions.HTTPError as errh:
         reply = f"HTTP error:", errh
     except requests.exceptions.ConnectionError as errc:

diff --git a/samples/managed-llm-provider/compose.yaml b/samples/managed-llm-provider/compose.yaml
@@ -7,19 +7,22 @@ services:
       - "8000:8000"
     restart: always
     environment:
-      - ENDPOINT_URL=http://llm/api/v1/chat/completions   # endpoint to the Provider Service
-      - MODEL=default  # LLM model ID used in the Provider Service
+      - LLM_MODEL # LLM model ID used
       # For other models, see https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping
     healthcheck:
       test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]
       interval: 30s
       timeout: 5s
       retries: 3
       start_period: 5s
+    depends_on:
+      - llm
 
 # Provider Service
 # This service is used to route requests to the LLM API
   llm:
     provider:
       type: model
+      options:
+        model: ai/smollm2
     x-defang-llm: true
diff --git a/samples/managed-llm/app/Dockerfile b/samples/managed-llm/app/Dockerfile
@@ -1,4 +1,4 @@
-FROM public.ecr.aws/docker/library/python:3.12-slim
+FROM python:3.12-slim
 
 # Set working directory
 WORKDIR /app

diff --git a/samples/managed-llm/compose.yaml b/samples/managed-llm/compose.yaml
@@ -1,6 +1,6 @@
 services:
   app:
-    build: 
+    build:
       context: ./app
       dockerfile: Dockerfile
     ports:
@@ -17,6 +17,8 @@ services:
       timeout: 5s
       retries: 3
       start_period: 5s
+    depends_on:
+      - llm
 
 # Defang OpenAI Access Gateway
 # This service is used to route requests to the LLM API