9
9
from langchain .schema import HumanMessage , AIMessage , SystemMessage
10
10
from langchain_openai import ChatOpenAI
11
11
12
-
13
12
settings = AppSettings .load ("./settings.yml" )
14
13
print ("App settings:" )
15
14
rich .print (settings )
26
25
# mistral model is specified using this regex and then handle it explicitly
27
26
# when contructing the `context` list in the `inference` function below.
28
27
MISTRAL_REGEX = re .compile (r".*mi(s|x)tral.*" , re .IGNORECASE )
29
- IS_MISTRAL_MODEL = ( MISTRAL_REGEX .match (settings .model_name ) is not None )
28
+ IS_MISTRAL_MODEL = MISTRAL_REGEX .match (settings .model_name ) is not None
30
29
if IS_MISTRAL_MODEL :
31
- print ("Detected Mistral model - will alter LangChain conversation format appropriately." )
30
+ print (
31
+ "Detected Mistral model - will alter LangChain conversation format appropriately."
32
+ )
32
33
33
34
llm = ChatOpenAI (
34
35
base_url = urljoin (backend_url , "v1" ),
35
- model = settings .model_name ,
36
+ model = settings .model_name ,
36
37
openai_api_key = "required-but-not-used" ,
37
38
temperature = settings .llm_temperature ,
38
39
max_tokens = settings .llm_max_tokens ,
44
45
streaming = True ,
45
46
)
46
47
47
- def inference (latest_message , history ):
48
48
49
+ def inference (latest_message , history ):
49
50
# Check backend health and warn the user on error
50
51
try :
51
52
response = requests .get (backend_health_endpoint , timeout = 5 )
@@ -68,7 +69,6 @@ def inference(latest_message, history):
68
69
# In this case backend is probably still busy downloading model weights
69
70
raise gr .Error ("Backend not ready yet - please try again later" )
70
71
71
-
72
72
try :
73
73
# To handle Mistral models we have to add the model instruction to
74
74
# the first user message since Mistral requires user -> ai -> user
@@ -78,7 +78,9 @@ def inference(latest_message, history):
78
78
context .append (SystemMessage (content = settings .model_instruction ))
79
79
for i , (human , ai ) in enumerate (history ):
80
80
if IS_MISTRAL_MODEL and i == 0 :
81
- context .append (HumanMessage (content = f"{ settings .model_instruction } \n \n { human } " ))
81
+ context .append (
82
+ HumanMessage (content = f"{ settings .model_instruction } \n \n { human } " )
83
+ )
82
84
else :
83
85
context .append (HumanMessage (content = human ))
84
86
context .append (AIMessage (content = ai ))
@@ -98,7 +100,9 @@ def inference(latest_message, history):
98
100
# For all other errors notify user and log a more detailed warning
99
101
except Exception as err :
100
102
warnings .warn (f"Exception encountered while generating response: { err } " )
101
- raise gr .Error ("Unknown error encountered - see application logs for more information." )
103
+ raise gr .Error (
104
+ "Unknown error encountered - see application logs for more information."
105
+ )
102
106
103
107
104
108
# UI colour theming
@@ -119,7 +123,7 @@ def inference(latest_message, history):
119
123
120
124
121
125
# Build main chat interface
122
- gr .ChatInterface (
126
+ with gr .ChatInterface (
123
127
inference ,
124
128
chatbot = gr .Chatbot (
125
129
# Height of conversation window in CSS units (string) or pixels (int)
@@ -139,4 +143,6 @@ def inference(latest_message, history):
139
143
analytics_enabled = False ,
140
144
theme = theme ,
141
145
css = css_overrides ,
142
- ).queue ().launch (server_name = "0.0.0.0" )
146
+ ) as app :
147
+ # app.launch(server_name="0.0.0.0")
148
+ app .launch ()
0 commit comments