@@ -57,6 +57,9 @@ def __init__(self, *args, **kwargs):
57
57
58
58
self .server = Server (self )
59
59
60
+ # For the 01. This lets the OAI compatible server accumulate context before responding.
61
+ self .context_mode = False
62
+
60
63
async def input (self , chunk ):
61
64
"""
62
65
Accumulates LMC chunks onto interpreter.messages.
@@ -773,6 +776,14 @@ async def chat_completion(request: ChatCompletionRequest):
773
776
# Handle special STOP token
774
777
return
775
778
779
+ if last_message .content == "{CONTEXT_MODE_ON}" :
780
+ async_interpreter .context_mode = True
781
+ return
782
+
783
+ if last_message .content == "{CONTEXT_MODE_OFF}" :
784
+ async_interpreter .context_mode = False
785
+ return
786
+
776
787
if type (last_message .content ) == str :
777
788
async_interpreter .messages .append (
778
789
{
@@ -812,7 +823,9 @@ async def chat_completion(request: ChatCompletionRequest):
812
823
}
813
824
)
814
825
815
- if os .getenv ("INTERPRETER_SERVER_REQUIRE_START" , False ):
826
+ if async_interpreter .context_mode :
827
+ # In context mode, we only respond if we recieved a {START} message
828
+ # Otherwise, we're just accumulating context
816
829
if last_message .content != "{START}" :
817
830
return
818
831
if async_interpreter .messages [- 1 ]["content" ] == "{START}" :
0 commit comments