fix compose, dockerfile + UI improvements

commit111 · commit111 · commit 7e1b77f745e3 · 2025-05-13T17:56:06.000-07:00
diff --git a/samples/managed-llm/app/Dockerfile b/samples/managed-llm/app/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:alpine
+FROM public.ecr.aws/docker/library/python:3.12-slim
 
 # Set working directory
 WORKDIR /app
@@ -15,5 +15,8 @@ COPY . .
 # Expose the port that Uvicorn will run on
 EXPOSE 8000
 
-# Run the app with the correct module path using Uvicorn
-CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port 8000"]
+# Set environment variable for the port
+ENV PORT=8000
+
+# Run the app with the correct module path using shell form to interpolate environment variable
+CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port $PORT"]
diff --git a/samples/managed-llm/app/app.py b/samples/managed-llm/app/app.py
@@ -4,22 +4,19 @@
 from fastapi import FastAPI, Form, Request
 from fastapi.responses import HTMLResponse
 import requests
-import dotenv
 
 app = FastAPI()
 
-# Load environment variables from .env file
-dotenv.load_dotenv()
-
 # Configure basic logging
 logging.basicConfig(level=logging.INFO)
 
-# Set the endpoint URL for the chat model
-# Here, we use the OpenAI API as an example:
-ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1")
+# Set the environment variables for the chat model
+ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions")
+# Fallback to OpenAI Model if not set in environment
+MODEL_ID = os.getenv("MODEL", "gpt-4-turbo")
 
 # Get the API key for the LLM
-# For development, you can use your local API key. In production, you will need to configure your API key in the LLM gateway service.
+# For development, you can use your local API key. In production, the LLM gateway service will override the need for it.
 def get_api_key():
     return os.getenv("OPENAI_API_KEY", "API key not set")
 
@@ -28,14 +25,18 @@ def get_api_key():
 async def home():
     return """
     <html>
-        <head><title>Ask the Model</title></head>
+        <head><title>Ask the AI Model</title></head>
         <body>
-            <h1>Ask the Magic Backpack 🧙‍♂️🎒</h1>
-            <form method="post" action="/ask">
-                <textarea name="prompt" rows="5" cols="60" placeholder="Enter your question here..."></textarea><br><br>
+            <h1>Ask the AI Model</h1>
+            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
+                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..." 
+                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.submit();}">
+                </textarea>
+                <br><br>
                 <input type="submit" value="Ask">
             </form>
         </body>
+        
     </html>
     """
 
@@ -46,55 +47,70 @@ async def ask(prompt: str = Form(...)):
         "Content-Type": "application/json"
     }
 
-    if not ENDPOINT_URL.startswith("http://localhost"):
-        API_KEY = get_api_key()
-        headers["Authorization"] = f"Bearer {API_KEY}"
-    else:
-        logging.info("Skipping Authorization header for localhost endpoint.")
+    api_key = get_api_key()
+    headers["Authorization"] = f"Bearer {api_key}"
 
     payload = {
-        "model": os.getenv("MODEL", "gpt-4-turbo"),
+        "model": MODEL_ID,
         "messages": [
             {"role": "user", "content": prompt}
         ],
         "stream": False
     }
 
-    # Log request details for debugging
+    # Log request details
     logging.info(f"Sending POST to {ENDPOINT_URL}")
     logging.info(f"Request Headers: {headers}")
     logging.info(f"Request Payload: {payload}")
 
-    response = requests.post(f"{ENDPOINT_URL}/chat/completions", headers=headers, data=json.dumps(payload))
+    response = None
+    reply = None
+    try:
+        response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload))
+    except requests.exceptions.HTTPError as errh:
+        reply = f"HTTP error:", errh
+    except requests.exceptions.ConnectionError as errc:
+        reply = f"Connection error:", errc
+    except requests.exceptions.Timeout as errt:
+        reply = f"Timeout error:", errt
+    except requests.exceptions.RequestException as err:
+        reply = f"Unexpected error:", err
 
-    if response.status_code == 200:
-        data = response.json()
-        try:
-            reply = data["choices"][0]["message"]["content"]
-        except (KeyError, IndexError):
-            reply = "Model returned an unexpected response."
-    else:
-        # Log error details
-        logging.error(f"Error from server: {response.status_code} - {response.text}")
-        reply = f"Error: {response.status_code} - {response.text}"
+    if response is not None:
+        # logging.info(f"Response Status Code: {response.status_code}")
+        # logging.info(f"Response Headers: {response.headers}")
+        # logging.info(f"Response Body: {response.text}")
+        if response.status_code == 200:
+            data = response.json()
+            try:
+                reply = data["choices"][0]["message"]["content"]
+            except (KeyError, IndexError):
+                reply = "Model returned an unexpected response."
+        elif response.status_code == 400:
+            reply = f"Connect Error: {response.status_code} - {response.text}"
+        elif response.status_code == 500:
+            reply = f"Error from server: {response.status_code} - {response.text}"
+        else:
+            # Log error details
+            reply = f"Error from server: {response.status_code} - {response.text}"
+            logging.error(f"Error from server: {response.status_code} - {response.text}")
 
     # Return result
     return f"""
     <html>
-        <head><title>Ask the Model</title></head>
+        <head><title>Ask the AI Model</title></head>
         <body>
-            <h1>Ask the Magic Backpack 🧙‍♂️🎒</h1>
-            <form method="post" action="/ask">
-                <textarea name="prompt" rows="5" cols="60" placeholder="Enter your question here...">{prompt}</textarea><br><br>
+            <h1>Ask the AI Model</h1>
+            <form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
+                <textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
+                  onkeydown="if(event.key==='Enter'&&!event.shiftKey){{event.preventDefault();this.form.submit();}}"></textarea><br><br>
                 <input type="submit" value="Ask">
             </form>
+            <h2>You Asked:</h2>
+            <p>{prompt}</p>
             <hr>
             <h2>Model's Reply:</h2>
             <p>{reply}</p>
         </body>
     </html>
     """
-
-@app.get("/health")
-async def health():
-    return {"status": "ok"}
diff --git a/samples/managed-llm/compose.dev.yaml b/samples/managed-llm/compose.dev.yaml
@@ -4,6 +4,6 @@ services:
       file: compose.yaml
       service: app
     environment:
-      - ENDPOINT_URL=https://api.openai.com/v1 # endpoint of the LLM used for local testing
+      - ENDPOINT_URL=https://api.openai.com/v1/chat/completions # endpoint of the LLM used for local testing
       - OPENAI_API_KEY=${OPENAI_API_KEY} # your OpenAI API key for local testing
       - MODEL=gpt-4-turbo # LLM model ID used for local testing
diff --git a/samples/managed-llm/compose.yaml b/samples/managed-llm/compose.yaml
@@ -7,23 +7,27 @@ services:
       - "8000:8000"
     restart: always
     environment:
-      - ENDPOINT_URL=http://llm-gateway:80 # endpoint to the LLM gateway service
-      - OPENAI_API_KEY=optionalkey # this value will be ignored in production when using the gateway
-      - MODEL=${MODEL} # LLM model ID used for the gateway
+      - ENDPOINT_URL=http://llm-gateway/api/v1/chat/completions   # endpoint to the gateway service
+      - MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0   # LLM model ID used for the gateway
+      - OPENAI_API_KEY=FAKE_TOKEN   # this value will be ignored in production when using the gateway
     healthcheck:
-      test: ["CMD", "wget", "--spider", "http://localhost:8000/health"]
+      test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+      start_period: 5s
 
-# Defang OpenAI Access Gateway
-# This service is used to route requests to the LLM API
+# # Defang OpenAI Access Gateway
+# # This service is used to route requests to the LLM API
   llm-gateway:
-    x-defang-llm: true # this must be set to true for the LLM gateway to work
+    x-defang-llm: true # this must be set to true in BYOC for the gateway to work
     image: defangio/openai-access-gateway:latest
     ports:
       - target: 80
         published: 80
         protocol: tcp
-        mode: ingress
-    env_file:
-      - .env
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:80/health"]
+        mode: host
+    environment:
+      - OPENAI_API_KEY=FAKE_TOKEN
+      - USE_MODEL_MAPPING=false
+      - DEBUG=true