Skip to content

Commit 1216494

Browse files
author
sd109
committed
Improve web app error handling
1 parent b949624 commit 1216494

File tree

1 file changed

+133
-60
lines changed

1 file changed

+133
-60
lines changed

chart/web-app/app.py

Lines changed: 133 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,46 @@
11
import requests
2-
import warnings
32
import re
4-
import rich
3+
import logging
54
import gradio as gr
65
from urllib.parse import urljoin
76
from config import AppSettings
87

98
from langchain.schema import HumanMessage, AIMessage, SystemMessage
109
from langchain_openai import ChatOpenAI
10+
import openai
1111

12-
print("\n Starting app \n---------------\n")
12+
logging.basicConfig()
13+
logger = logging.getLogger(__name__)
14+
logger.setLevel(logging.INFO)
15+
16+
logger.info("Starting app")
1317

1418
settings = AppSettings.load("./settings.yml")
15-
print("App settings:")
16-
rich.print(settings)
19+
logger.info("App settings: %s", settings)
1720

1821
backend_url = str(settings.backend_url)
1922
backend_health_endpoint = urljoin(backend_url, "/health")
20-
backend_initialised = False
21-
22-
# NOTE(sd109): The Mistral family of models explicitly require a chat
23-
# history of the form user -> ai -> user -> ... and so don't like having
24-
# a SystemPrompt at the beginning. Since these models seem to be the
25-
# best around right now, it makes sense to treat them as special and make
26-
# sure the web app works correctly with them. To do so, we detect when a
27-
# mistral model is specified using this regex and then handle it explicitly
28-
# when contructing the `context` list in the `inference` function below.
29-
MISTRAL_REGEX = re.compile(r".*mi(s|x)tral.*", re.IGNORECASE)
30-
IS_MISTRAL_MODEL = MISTRAL_REGEX.match(settings.model_name) is not None
31-
if IS_MISTRAL_MODEL:
32-
print(
33-
"Detected Mistral model - will alter LangChain conversation format appropriately."
34-
)
23+
BACKEND_INITIALISED = False
24+
25+
# # NOTE(sd109): The Mistral family of models explicitly require a chat
26+
# # history of the form user -> ai -> user -> ... and so don't like having
27+
# # a SystemPrompt at the beginning. Since these models seem to be the
28+
# # best around right now, it makes sense to treat them as special and make
29+
# # sure the web app works correctly with them. To do so, we detect when a
30+
# # mistral model is specified using this regex and then handle it explicitly
31+
# # when contructing the `context` list in the `inference` function below.
32+
# MISTRAL_REGEX = re.compile(r".*mi(s|x)tral.*", re.IGNORECASE)
33+
# IS_MISTRAL_MODEL = MISTRAL_REGEX.match(settings.model_name) is not None
34+
# if IS_MISTRAL_MODEL:
35+
# print(
36+
# "Detected Mistral model - will alter LangChain conversation format appropriately."
37+
# )
38+
39+
# Some models disallow 'system' role's their conversation history by raising errors in their chat prompt template, e.g. see
40+
# https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/cf47bb3e18fe41a5351bc36eef76e9c900847c89/tokenizer_config.json#L42
41+
# Detecting this ahead of time is difficult so for now we use a global variable which stores whether the API has
42+
# responded with a HTTP 400 error and retry request without system role replaced by
43+
INCLUDE_SYSTEM_PROMPT = True
3544

3645
llm = ChatOpenAI(
3746
base_url=urljoin(backend_url, "v1"),
@@ -50,47 +59,92 @@
5059

5160
def inference(latest_message, history):
5261
# Check backend health and warn the user on error
53-
try:
54-
response = requests.get(backend_health_endpoint, timeout=5)
55-
if response.status_code == 200:
56-
global backend_initialised
57-
if not backend_initialised:
58-
# Record the fact that backend was up at one point so we know that
59-
# any future errors are not related to slow model initialisation
60-
backend_initialised = True
61-
else:
62-
# If the server's running (i.e. we get a response) but it's not an HTTP 200
63-
# we just hope Kubernetes reconciles things for us eventually..
64-
raise gr.Error("Backend unhealthy - please try again later")
65-
except Exception as err:
66-
warnings.warn(f"Error while checking backend health: {err}")
67-
if backend_initialised:
68-
# If backend was previously reachable then something unexpected has gone wrong
69-
raise gr.Error("Backend unreachable")
70-
else:
71-
# In this case backend is probably still busy downloading model weights
72-
raise gr.Error("Backend not ready yet - please try again later")
62+
# try:
63+
# response = requests.get(backend_health_endpoint, timeout=5)
64+
# response_code = response.status_code
65+
# if response_code == 200:
66+
# global backend_initialised
67+
# if not backend_initialised:
68+
# # Record the fact that backend was up at one point so we know that
69+
# # any future errors are not related to slow model initialisation
70+
# backend_initialised = True
71+
# elif response_code >= 400 and response_code < 500:
72+
# logging.warn(f"Received HTTP {response_code} response from backend. Full response: {response.text}")
73+
# else:
74+
# # If the server's running (i.e. we get a response) but it's not an HTTP 200
75+
# # we just hope Kubernetes reconciles things for us eventually..
76+
# raise gr.Error("Backend unhealthy - please try again later")
77+
# except Exception as err:
78+
# warnings.warn(f"Error while checking backend health: {err}")
79+
# if backend_initialised:
80+
# # If backend was previously reachable then something unexpected has gone wrong
81+
# raise gr.Error("Backend unreachable")
82+
# else:
83+
# # In this case backend is probably still busy downloading model weights
84+
# raise gr.Error("Backend not ready yet - please try again later")
85+
86+
# try:
87+
# # To handle Mistral models we have to add the model instruction to
88+
# # the first user message since Mistral requires user -> ai -> user
89+
# # chat format and therefore doesn't allow system prompts.
90+
# context = []
91+
# if not IS_MISTRAL_MODEL:
92+
# context.append(SystemMessage(content=settings.model_instruction))
93+
# for i, (human, ai) in enumerate(history):
94+
# if IS_MISTRAL_MODEL and i == 0:
95+
# context.append(
96+
# HumanMessage(content=f"{settings.model_instruction}\n\n{human}")
97+
# )
98+
# else:
99+
# context.append(HumanMessage(content=human))
100+
# context.append(AIMessage(content=ai))
101+
# context.append(HumanMessage(content=latest_message))
102+
103+
# response = ""
104+
# for chunk in llm.stream(context):
105+
# # print(chunk)
106+
# # NOTE(sd109): For some reason the '>' character breaks the UI
107+
# # so we need to escape it here.
108+
# # response += chunk.content.replace('>', '\>')
109+
# # UPDATE(sd109): Above bug seems to have been fixed as of gradio 4.15.0
110+
# # but keeping this note here incase we enounter it again
111+
# response += chunk.content
112+
# yield response
113+
114+
# # For all other errors notify user and log a more detailed warning
115+
# except Exception as err:
116+
# warnings.warn(f"Exception encountered while generating response: {err}")
117+
# raise gr.Error(
118+
# "Unknown error encountered - see application logs for more information."
119+
# )
120+
121+
122+
# Allow mutating global variables
123+
global BACKEND_INITIALISED, INCLUDE_SYSTEM_PROMPT
73124

74125
try:
75-
# To handle Mistral models we have to add the model instruction to
76-
# the first user message since Mistral requires user -> ai -> user
77-
# chat format and therefore doesn't allow system prompts.
78-
context = []
79-
if not IS_MISTRAL_MODEL:
80-
context.append(SystemMessage(content=settings.model_instruction))
126+
# Attempt to handle models which disallow system prompts
127+
# Construct conversation history for model prompt
128+
if INCLUDE_SYSTEM_PROMPT:
129+
context = [SystemMessage(content=settings.model_instruction)]
130+
else:
131+
context = []
81132
for i, (human, ai) in enumerate(history):
82-
if IS_MISTRAL_MODEL and i == 0:
83-
context.append(
84-
HumanMessage(content=f"{settings.model_instruction}\n\n{human}")
85-
)
86-
else:
87-
context.append(HumanMessage(content=human))
88-
context.append(AIMessage(content=ai))
133+
if not INCLUDE_SYSTEM_PROMPT and i == 0:
134+
# Mimic system prompt by prepending it to first human message
135+
human = f"{settings.model_instruction}\n\n{human}"
136+
context.append(HumanMessage(content=human))
137+
context.append(AIMessage(content=(ai or "")))
89138
context.append(HumanMessage(content=latest_message))
90139

91140
response = ""
92141
for chunk in llm.stream(context):
93-
# print(chunk)
142+
143+
# If this is our first successful response from the backend
144+
# then update the status variable
145+
if not BACKEND_INITIALISED and len(response) > 0:
146+
BACKEND_INITIALISED = True
147+
94148
# NOTE(sd109): For some reason the '>' character breaks the UI
95149
# so we need to escape it here.
96150
# response += chunk.content.replace('>', '\>')
@@ -99,12 +153,31 @@ def inference(latest_message, history):
99153
response += chunk.content
100154
yield response
101155

102-
# For all other errors notify user and log a more detailed warning
156+
except openai.BadRequestError as err:
157+
logger.error("Received BadRequestError from backend API: %s", err)
158+
message = err.response.json()['message']
159+
if INCLUDE_SYSTEM_PROMPT:
160+
INCLUDE_SYSTEM_PROMPT = False
161+
# TODO: Somehow retry same inference step without system prompt
162+
pass
163+
ui_message = f"API Error received. This usually means the chosen LLM uses an incompatible prompt format. Error message was: {message}"
164+
raise gr.Error(ui_message)
165+
166+
except openai.APIConnectionError as err:
167+
if not BACKEND_INITIALISED:
168+
logger.info("Backend API not yet ready")
169+
gr.Info("Backend not ready - model may still be initialising - please try again later")
170+
else:
171+
logger.error("Failed to connect to backend API: %s", err)
172+
gr.Warning("Failed to connect to backend API")
173+
174+
except openai.InternalServerError as err:
175+
gr.Warning("Internal server error encountered in backend API - see API logs for details.")
176+
177+
# Catch-all for unexpected exceptions
103178
except Exception as err:
104-
warnings.warn(f"Exception encountered while generating response: {err}")
105-
raise gr.Error(
106-
"Unknown error encountered - see application logs for more information."
107-
)
179+
logger.error("Unexpected error during inference: %s", err)
180+
raise gr.Error("Unexpected error encountered - see logs for details.")
108181

109182

110183
# UI colour theming
@@ -146,5 +219,5 @@ def inference(latest_message, history):
146219
theme=theme,
147220
css=css_overrides,
148221
) as app:
149-
# app.launch(server_name="0.0.0.0")
222+
# app.launch(server_name="0.0.0.0") # Do we need this for k8s service?
150223
app.launch()

0 commit comments

Comments
 (0)