Skip to content

Commit 7e1b77f

Browse files
committed
fix compose, dockerfile + UI improvements
1 parent 8607f87 commit 7e1b77f

File tree

4 files changed

+77
-54
lines changed

4 files changed

+77
-54
lines changed

samples/managed-llm/app/Dockerfile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM python:alpine
1+
FROM public.ecr.aws/docker/library/python:3.12-slim
22

33
# Set working directory
44
WORKDIR /app
@@ -15,5 +15,8 @@ COPY . .
1515
# Expose the port that Uvicorn will run on
1616
EXPOSE 8000
1717

18-
# Run the app with the correct module path using Uvicorn
19-
CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port 8000"]
18+
# Set environment variable for the port
19+
ENV PORT=8000
20+
21+
# Run the app with the correct module path using shell form to interpolate environment variable
22+
CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port $PORT"]

samples/managed-llm/app/app.py

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@
44
from fastapi import FastAPI, Form, Request
55
from fastapi.responses import HTMLResponse
66
import requests
7-
import dotenv
87

98
app = FastAPI()
109

11-
# Load environment variables from .env file
12-
dotenv.load_dotenv()
13-
1410
# Configure basic logging
1511
logging.basicConfig(level=logging.INFO)
1612

17-
# Set the endpoint URL for the chat model
18-
# Here, we use the OpenAI API as an example:
19-
ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1")
13+
# Set the environment variables for the chat model
14+
ENDPOINT_URL = os.getenv("ENDPOINT_URL", "https://api.openai.com/v1/chat/completions")
15+
# Fallback to OpenAI Model if not set in environment
16+
MODEL_ID = os.getenv("MODEL", "gpt-4-turbo")
2017

2118
# Get the API key for the LLM
22-
# For development, you can use your local API key. In production, you will need to configure your API key in the LLM gateway service.
19+
# For development, you can use your local API key. In production, the LLM gateway service will override the need for it.
2320
def get_api_key():
2421
return os.getenv("OPENAI_API_KEY", "API key not set")
2522

@@ -28,14 +25,18 @@ def get_api_key():
2825
async def home():
2926
return """
3027
<html>
31-
<head><title>Ask the Model</title></head>
28+
<head><title>Ask the AI Model</title></head>
3229
<body>
33-
<h1>Ask the Magic Backpack 🧙‍♂️🎒</h1>
34-
<form method="post" action="/ask">
35-
<textarea name="prompt" rows="5" cols="60" placeholder="Enter your question here..."></textarea><br><br>
30+
<h1>Ask the AI Model</h1>
31+
<form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
32+
<textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
33+
onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();this.form.submit();}">
34+
</textarea>
35+
<br><br>
3636
<input type="submit" value="Ask">
3737
</form>
3838
</body>
39+
3940
</html>
4041
"""
4142

@@ -46,55 +47,70 @@ async def ask(prompt: str = Form(...)):
4647
"Content-Type": "application/json"
4748
}
4849

49-
if not ENDPOINT_URL.startswith("http://localhost"):
50-
API_KEY = get_api_key()
51-
headers["Authorization"] = f"Bearer {API_KEY}"
52-
else:
53-
logging.info("Skipping Authorization header for localhost endpoint.")
50+
api_key = get_api_key()
51+
headers["Authorization"] = f"Bearer {api_key}"
5452

5553
payload = {
56-
"model": os.getenv("MODEL", "gpt-4-turbo"),
54+
"model": MODEL_ID,
5755
"messages": [
5856
{"role": "user", "content": prompt}
5957
],
6058
"stream": False
6159
}
6260

63-
# Log request details for debugging
61+
# Log request details
6462
logging.info(f"Sending POST to {ENDPOINT_URL}")
6563
logging.info(f"Request Headers: {headers}")
6664
logging.info(f"Request Payload: {payload}")
6765

68-
response = requests.post(f"{ENDPOINT_URL}/chat/completions", headers=headers, data=json.dumps(payload))
66+
response = None
67+
reply = None
68+
try:
69+
response = requests.post(f"{ENDPOINT_URL}", headers=headers, data=json.dumps(payload))
70+
except requests.exceptions.HTTPError as errh:
71+
reply = f"HTTP error:", errh
72+
except requests.exceptions.ConnectionError as errc:
73+
reply = f"Connection error:", errc
74+
except requests.exceptions.Timeout as errt:
75+
reply = f"Timeout error:", errt
76+
except requests.exceptions.RequestException as err:
77+
reply = f"Unexpected error:", err
6978

70-
if response.status_code == 200:
71-
data = response.json()
72-
try:
73-
reply = data["choices"][0]["message"]["content"]
74-
except (KeyError, IndexError):
75-
reply = "Model returned an unexpected response."
76-
else:
77-
# Log error details
78-
logging.error(f"Error from server: {response.status_code} - {response.text}")
79-
reply = f"Error: {response.status_code} - {response.text}"
79+
if response is not None:
80+
# logging.info(f"Response Status Code: {response.status_code}")
81+
# logging.info(f"Response Headers: {response.headers}")
82+
# logging.info(f"Response Body: {response.text}")
83+
if response.status_code == 200:
84+
data = response.json()
85+
try:
86+
reply = data["choices"][0]["message"]["content"]
87+
except (KeyError, IndexError):
88+
reply = "Model returned an unexpected response."
89+
elif response.status_code == 400:
90+
reply = f"Connect Error: {response.status_code} - {response.text}"
91+
elif response.status_code == 500:
92+
reply = f"Error from server: {response.status_code} - {response.text}"
93+
else:
94+
# Log error details
95+
reply = f"Error from server: {response.status_code} - {response.text}"
96+
logging.error(f"Error from server: {response.status_code} - {response.text}")
8097

8198
# Return result
8299
return f"""
83100
<html>
84-
<head><title>Ask the Model</title></head>
101+
<head><title>Ask the AI Model</title></head>
85102
<body>
86-
<h1>Ask the Magic Backpack 🧙‍♂️🎒</h1>
87-
<form method="post" action="/ask">
88-
<textarea name="prompt" rows="5" cols="60" placeholder="Enter your question here...">{prompt}</textarea><br><br>
103+
<h1>Ask the AI Model</h1>
104+
<form method="post" action="/ask" onsubmit="document.getElementById('loader').style.display='block'">
105+
<textarea name="prompt" autofocus="autofocus" rows="5" cols="60" placeholder="Enter your question here..."
106+
onkeydown="if(event.key==='Enter'&&!event.shiftKey){{event.preventDefault();this.form.submit();}}"></textarea><br><br>
89107
<input type="submit" value="Ask">
90108
</form>
109+
<h2>You Asked:</h2>
110+
<p>{prompt}</p>
91111
<hr>
92112
<h2>Model's Reply:</h2>
93113
<p>{reply}</p>
94114
</body>
95115
</html>
96116
"""
97-
98-
@app.get("/health")
99-
async def health():
100-
return {"status": "ok"}

samples/managed-llm/compose.dev.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ services:
44
file: compose.yaml
55
service: app
66
environment:
7-
- ENDPOINT_URL=https://api.openai.com/v1 # endpoint of the LLM used for local testing
7+
- ENDPOINT_URL=https://api.openai.com/v1/chat/completions # endpoint of the LLM used for local testing
88
- OPENAI_API_KEY=${OPENAI_API_KEY} # your OpenAI API key for local testing
99
- MODEL=gpt-4-turbo # LLM model ID used for local testing

samples/managed-llm/compose.yaml

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,27 @@ services:
77
- "8000:8000"
88
restart: always
99
environment:
10-
- ENDPOINT_URL=http://llm-gateway:80 # endpoint to the LLM gateway service
11-
- OPENAI_API_KEY=optionalkey # this value will be ignored in production when using the gateway
12-
- MODEL=${MODEL} # LLM model ID used for the gateway
10+
- ENDPOINT_URL=http://llm-gateway/api/v1/chat/completions # endpoint to the gateway service
11+
- MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0 # LLM model ID used for the gateway
12+
- OPENAI_API_KEY=FAKE_TOKEN # this value will be ignored in production when using the gateway
1313
healthcheck:
14-
test: ["CMD", "wget", "--spider", "http://localhost:8000/health"]
14+
test: ["CMD", "python3", "-c", "import sys, urllib.request; urllib.request.urlopen(sys.argv[1]).read()", "http://localhost:8000/"]
15+
interval: 30s
16+
timeout: 5s
17+
retries: 3
18+
start_period: 5s
1519

16-
# Defang OpenAI Access Gateway
17-
# This service is used to route requests to the LLM API
20+
# # Defang OpenAI Access Gateway
21+
# # This service is used to route requests to the LLM API
1822
llm-gateway:
19-
x-defang-llm: true # this must be set to true for the LLM gateway to work
23+
x-defang-llm: true # this must be set to true in BYOC for the gateway to work
2024
image: defangio/openai-access-gateway:latest
2125
ports:
2226
- target: 80
2327
published: 80
2428
protocol: tcp
25-
mode: ingress
26-
env_file:
27-
- .env
28-
healthcheck:
29-
test: ["CMD", "curl", "-f", "http://localhost:80/health"]
29+
mode: host
30+
environment:
31+
- OPENAI_API_KEY=FAKE_TOKEN
32+
- USE_MODEL_MAPPING=false
33+
- DEBUG=true

0 commit comments

Comments
 (0)