Merge pull request #384 from DefangLabs/jordan/refactor-managed-llm-samples

jordanstephens · web-flow · commit 3de7f9e2ac05 · 2025-05-23T07:35:33.000-07:00
Refactor managed llm samples
diff --git a/samples/managed-llm-provider/README.md b/samples/managed-llm-provider/README.md
@@ -36,7 +36,7 @@ The `x-defang-llm` property on the `llm` service must be set to `true` in order
 To run the application locally, you can use the following command:
 
 ```bash
-docker compose -f compose.dev.yaml up --build
+docker compose -f compose.local.yaml up --build
 ```
 
 ## Deployment
diff --git a/samples/managed-llm-provider/app/app.py b/samples/managed-llm-provider/app/app.py
@@ -3,10 +3,11 @@
 import os
 
 import requests
-from fastapi import FastAPI, Form, Request
+from fastapi import FastAPI, Form
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import JSONResponse
+from fastapi.responses import FileResponse
 
 app = FastAPI()
 app.mount("/static", StaticFiles(directory="static"), name="static")
@@ -22,33 +23,14 @@
 MODEL_ID = os.getenv("LLM_MODEL", "gpt-4-turbo")
 
 # Get the API key for the LLM
-# For development, you can use your local API key. In production, the LLM gateway service will override the need for it.
+# For development, you have the option to use your local API key. In production, the LLM gateway service will override the need for it.
 def get_api_key():
     return os.getenv("OPENAI_API_KEY", "")
 
 # Home page form
 @app.get("/", response_class=HTMLResponse)
 async def home():
-    return """
-    <html>
-        <head>
-            <title>Ask the AI Model</title>
-            <script type="text/javascript" src="./static/app.js"></script>
-        </head>
-        <body>
-            <h1>Ask the AI Model</h1>
-            <form method="post" id="askForm" onsubmit="event.preventDefault(); submitForm(event);">
-                <textarea id="prompt" name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
-                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.dispatchEvent(new Event('submit', {cancelable:true}));}"></textarea>
-                <br><br>
-                <input type="submit" value="Ask">
-            </form>
-            <hr>
-            <h2>Model's Reply:</h2>
-            <p id="reply"></p>
-        </body>
-    </html>
-    """
+    return FileResponse("static/index.html", media_type="text/html")
 
 # Handle form submission
 @app.post("/ask", response_class=JSONResponse)
diff --git a/samples/managed-llm-provider/app/static/index.html b/samples/managed-llm-provider/app/static/index.html
@@ -0,0 +1,18 @@
+<html>
+    <head>
+        <title>Ask the AI Model</title>
+        <script type="text/javascript" src="./static/app.js"></script>
+    </head>
+    <body>
+        <h1>Ask the AI Model</h1>
+        <form method="post" id="askForm" onsubmit="event.preventDefault(); submitForm(event);">
+            <textarea id="prompt" name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
+              onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.dispatchEvent(new Event('submit', {cancelable:true}));}"></textarea>
+            <br><br>
+            <input type="submit" value="Ask">
+        </form>
+        <hr>
+        <h2>Model's Reply:</h2>
+        <p id="reply"></p>
+    </body>
+</html>
diff --git a/samples/managed-llm-provider/compose.yaml b/samples/managed-llm-provider/compose.yaml
@@ -7,7 +7,7 @@ services:
       - "8000:8000"
     restart: always
     environment:
-      - LLM_MODEL # LLM model ID used
+      - LLM_MODEL=default
       # For other models, see https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping
     healthcheck:
       test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]
diff --git a/samples/managed-llm/README.md b/samples/managed-llm/README.md
@@ -11,19 +11,19 @@ Using the [Defang OpenAI Access Gateway](#defang-openai-access-gateway), the fea
 
 This allows switching from OpenAI to the Managed LLMs on supported cloud platforms without modifying your application code.
 
-You can configure the `MODEL` and `ENDPOINT_URL` for the LLM separately for local development and production environments.
+You can configure the `MODEL` and `LLM_URL` for the LLM separately for local development and production environments.
 * The `MODEL` is the LLM Model ID you are using.
-* The `ENDPOINT_URL` is the bridge that provides authenticated access to the LLM model. 
+* The `LLM_URL` is the bridge that provides authenticated access to the LLM model.
 
 Ensure you have enabled model access for the model you intend to use. To do this, you can check your [AWS Bedrock model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-modify.html) or [GCP Vertex AI model access](https://cloud.google.com/vertex-ai/generative-ai/docs/control-model-access).
 
-To learn about available LLM models in Defang, please see our [Model Mapping documentation](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping). 
+To learn about available LLM models in Defang, please see our [Model Mapping documentation](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping).
 
 For more about Managed LLMs in Defang, please see our [Managed LLMs documentation](https://docs.defang.io/docs/concepts/managed-llms/managed-language-models).
 
 ### Defang OpenAI Access Gateway
 
-In the `compose.yaml` file, the `llm` service is used to route requests to the LLM API model. This is known as the [Defang OpenAI Access Gateway](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway). 
+In the `compose.yaml` file, the `llm` service is used to route requests to the LLM API model. This is known as the [Defang OpenAI Access Gateway](https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway).
 
 The `x-defang-llm` property on the `llm` service must be set to `true` in order to use the OpenAI Access Gateway when deploying with Defang.
 
@@ -38,7 +38,7 @@ The `x-defang-llm` property on the `llm` service must be set to `true` in order
 To run the application locally, you can use the following command:
 
 ```bash
-docker compose -f compose.dev.yaml up --build
+docker compose -f compose.local.yaml up --build
 ```
 
 ## Deployment
diff --git a/samples/managed-llm/app/app.py b/samples/managed-llm/app/app.py
@@ -1,55 +1,40 @@
-import os
 import json
 import logging
-from fastapi import FastAPI, Form, Request
-from fastapi.responses import HTMLResponse
+import os
+
 import requests
+from fastapi import FastAPI, Form
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import JSONResponse
+from fastapi.responses import FileResponse
 
 app = FastAPI()
+app.mount("/static", StaticFiles(directory="static"), name="static")
 
 # Configure basic logging
 logging.basicConfig(level=logging.INFO)
 
+default_openai_base_url = "https://api.openai.com/v1/"
+
 # Set the environment variables for the chat model
-ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions")
-# Fallback to OpenAI Model if not set in environment
-MODEL_ID = os.getenv("MODEL", "gpt-4-turbo")
+LLM_URL = os.getenv("LLM_URL", default_openai_base_url) + "chat/completions"
+# Fallback LLM Model if not set in environment
+MODEL_ID = os.getenv("LLM_MODEL", "gpt-4-turbo")
 
 # Get the API key for the LLM
-# For development, you can use your local API key. In production, the LLM gateway service will override the need for it.
+# For development, you have the option to use your local API key. In production, the LLM gateway service will override the need for it.
 def get_api_key():
-    return os.getenv("OPENAI_API_KEY", "API key not set")
+    return os.getenv("OPENAI_API_KEY", "")
 
 # Home page form
 @app.get("/", response_class=HTMLResponse)
 async def home():
-    return """
-    <html>
-        <head><title>Ask the AI Model</title></head>
-        <body>
-            <h1>Ask the AI Model</h1>
-            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
-                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..." 
-                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.submit();}">
-                </textarea>
-                <br><br>
-                <input type="submit" value="Ask">
-            </form>
-        </body>
-        
-    </html>
-    """
+    return FileResponse("static/index.html", media_type="text/html")
 
 # Handle form submission
-@app.post("/ask", response_class=HTMLResponse)
+@app.post("/ask", response_class=JSONResponse)
 async def ask(prompt: str = Form(...)):
-    headers = {
-        "Content-Type": "application/json"
-    }
-
-    api_key = get_api_key()
-    headers["Authorization"] = f"Bearer {api_key}"
-
     payload = {
         "model": MODEL_ID,
         "messages": [
@@ -58,59 +43,43 @@ async def ask(prompt: str = Form(...)):
         "stream": False
     }
 
+    reply = get_llm_response(payload)
+
+    return {"prompt": prompt, "reply": reply}
+
+def get_llm_response(payload):
+    api_key = get_api_key()
+    request_headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+
     # Log request details
-    logging.info(f"Sending POST to {ENDPOINT_URL}")
-    logging.info(f"Request Headers: {headers}")
-    logging.info(f"Request Payload: {payload}")
+    logging.debug(f"Sending POST to {LLM_URL}")
+    logging.debug(f"Request Headers: {request_headers}")
+    logging.debug(f"Request Payload: {payload}")
 
     response = None
-    reply = None
     try:
-        response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload))
+        response = requests.post(f"{LLM_URL}", headers=request_headers, data=json.dumps(payload))
     except requests.exceptions.HTTPError as errh:
-        reply = f"HTTP error:", errh
+        return f"HTTP error:", errh
     except requests.exceptions.ConnectionError as errc:
-        reply = f"Connection error:", errc
+        return f"Connection error:", errc
     except requests.exceptions.Timeout as errt:
-        reply = f"Timeout error:", errt
+        return f"Timeout error:", errt
     except requests.exceptions.RequestException as err:
-        reply = f"Unexpected error:", err
+        return f"Unexpected error:", err
 
-    if response is not None:
-        # logging.info(f"Response Status Code: {response.status_code}")
-        # logging.info(f"Response Headers: {response.headers}")
-        # logging.info(f"Response Body: {response.text}")
-        if response.status_code == 200:
-            data = response.json()
-            try:
-                reply = data["choices"][0]["message"]["content"]
-            except (KeyError, IndexError):
-                reply = "Model returned an unexpected response."
-        elif response.status_code == 400:
-            reply = f"Connect Error: {response.status_code} - {response.text}"
-        elif response.status_code == 500:
-            reply = f"Error from server: {response.status_code} - {response.text}"
-        else:
-            # Log error details
-            reply = f"Error from server: {response.status_code} - {response.text}"
-            logging.error(f"Error from server: {response.status_code} - {response.text}")
+    if response is None:
+        return f"Error: No response from server."
+    if response.status_code == 400:
+        return f"Connect Error: {response.status_code} - {response.text}"
+    if response.status_code == 500:
+        return f"Error from server: {response.status_code} - {response.text}"
 
-    # Return result
-    return f"""
-    <html>
-        <head><title>Ask the AI Model</title></head>
-        <body>
-            <h1>Ask the AI Model</h1>
-            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
-                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
-                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){{event.preventDefault();this.form.submit();}}"></textarea><br><br>
-                <input type="submit" value="Ask">
-            </form>
-            <h2>You Asked:</h2>
-            <p>{prompt}</p>
-            <hr>
-            <h2>Model's Reply:</h2>
-            <p>{reply}</p>
-        </body>
-    </html>
-    """
+    try:
+        data = response.json()
+        return data["choices"][0]["message"]["content"]
+    except (KeyError, IndexError):
+        return "Model returned an unexpected response."
diff --git a/samples/managed-llm/app/static/app.js b/samples/managed-llm/app/static/app.js
@@ -0,0 +1,14 @@
+async function submitForm(event) {
+    event.preventDefault();
+    const prompt = document.getElementById('prompt').value;
+    document.getElementById('reply').innerHTML = "Loading...";
+    const response = await fetch('/ask', {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/x-www-form-urlencoded'
+        },
+        body: new URLSearchParams({prompt})
+    });
+    const data = await response.json();
+    document.getElementById('reply').innerHTML = data.reply || "No reply found.";
+}
diff --git a/samples/managed-llm/app/static/index.html b/samples/managed-llm/app/static/index.html
@@ -0,0 +1,18 @@
+<html>
+    <head>
+        <title>Ask the AI Model</title>
+        <script type="text/javascript" src="./static/app.js"></script>
+    </head>
+    <body>
+        <h1>Ask the AI Model</h1>
+        <form method="post" id="askForm" onsubmit="event.preventDefault(); submitForm(event);">
+            <textarea id="prompt" name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
+              onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.dispatchEvent(new Event('submit', {cancelable:true}));}"></textarea>
+            <br><br>
+            <input type="submit" value="Ask">
+        </form>
+        <hr>
+        <h2>Model's Reply:</h2>
+        <p id="reply"></p>
+    </body>
+</html>
diff --git a/samples/managed-llm/compose.local.yaml b/samples/managed-llm/compose.local.yaml
@@ -3,10 +3,12 @@ services:
     extends:
       file: compose.yaml
       service: app
+    volumes:
+      - ./app:/app
   llm:
     extends:
-        file: compose.yaml
-        service: llm
+      file: compose.yaml
+      service: llm
     # if using AWS Bedrock for local development, include this section:
     environment:
       - AWS_REGION=${AWS_REGION} # replace with your AWS region
diff --git a/samples/managed-llm/compose.yaml b/samples/managed-llm/compose.yaml
@@ -7,8 +7,8 @@ services:
       - "8000:8000"
     restart: always
     environment:
-      - ENDPOINT_URL=http://llm/api/v1/chat/completions   # endpoint to the gateway service
-      - MODEL=default  # LLM model ID used for the gateway. 
+      - LLM_URL=http://llm/api/v1/   # endpoint to the gateway service
+      - MODEL=default  # LLM model ID used for the gateway.
        # For other models, see https://docs.defang.io/docs/concepts/managed-llms/openai-access-gateway#model-mapping
       - OPENAI_API_KEY=FAKE_TOKEN   # the actual value will be ignored when using the gateway, but it should match the one in the llm service
     healthcheck: