26
26
# Some models disallow 'system' role's their conversation history by raising errors in their chat prompt template, e.g. see
27
27
# https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/cf47bb3e18fe41a5351bc36eef76e9c900847c89/tokenizer_config.json#L42
28
28
# Detecting this ahead of time is difficult so for now we use a global variable which stores whether the API has
29
- # responded with a HTTP 400 error and retry request without system role replaced by
29
+ # responded with a HTTP 400 error and formats all subsequent request to avoid using a system role.
30
30
INCLUDE_SYSTEM_PROMPT = True
31
31
class PossibleSystemPromptException (Exception ):
32
32
pass
@@ -48,12 +48,10 @@ class PossibleSystemPromptException(Exception):
48
48
49
49
def inference (latest_message , history ):
50
50
51
- # Allow mutating global variables
52
- global BACKEND_INITIALISED , INCLUDE_SYSTEM_PROMPT
51
+ # Allow mutating global variable
52
+ global BACKEND_INITIALISED
53
53
54
54
try :
55
- # Attempt to handle models which disallow system prompts
56
- # Construct conversation history for model prompt
57
55
if INCLUDE_SYSTEM_PROMPT :
58
56
context = [SystemMessage (content = settings .hf_model_instruction )]
59
57
else :
@@ -84,6 +82,9 @@ def inference(latest_message, history):
84
82
response += chunk .content
85
83
yield response
86
84
85
+ # Handle any API errors here. See OpenAI Python client for possible error responses
86
+ # https://github.com/openai/openai-python/tree/e8e5a0dc7ccf2db19d7f81991ee0987f9c3ae375?tab=readme-ov-file#handling-errors
87
+
87
88
except openai .BadRequestError as err :
88
89
logger .error ("Received BadRequestError from backend API: %s" , err )
89
90
message = err .response .json ()['message' ]
@@ -131,14 +132,15 @@ def inference(latest_message, history):
131
132
def inference_wrapper (* args ):
132
133
"""
133
134
Simple wrapper round the `inference` function which catches certain predictable errors
134
- such as invalid prompty formats and attempts to mitigate them automatically.
135
+ such as invalid prompt formats and attempts to mitigate them automatically.
135
136
"""
137
+ # Allow mutating global variable
138
+ global INCLUDE_SYSTEM_PROMPT
136
139
try :
137
140
for chunk in inference (* args ):
138
141
yield chunk
139
142
except PossibleSystemPromptException :
140
143
logger .warning ("Disabling system prompt and retrying previous request" )
141
- global INCLUDE_SYSTEM_PROMPT
142
144
INCLUDE_SYSTEM_PROMPT = False
143
145
for chunk in inference (* args ):
144
146
yield chunk
0 commit comments