File tree Expand file tree Collapse file tree 3 files changed +18
-5
lines changed Expand file tree Collapse file tree 3 files changed +18
-5
lines changed Original file line number Diff line number Diff line change @@ -2591,11 +2591,15 @@ int main(int argc, char ** argv) {
25912591 return false ;
25922592 };
25932593
2594- auto middleware_server_state = [&state](const httplib::Request &, httplib::Response & res) {
2594+ auto middleware_server_state = [&res_error, & state](const httplib::Request & req , httplib::Response & res) {
25952595 server_state current_state = state.load ();
25962596 if (current_state == SERVER_STATE_LOADING_MODEL) {
2597- res.set_content (" <html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>" , " text/html; charset=utf-8" );
2598- res.status = 503 ;
2597+ if (req.path == " /" ){
2598+ res.set_content (" <html><body>The model is loading. Please wait.<br/>The user interface will appear soon.</body></html>" , " text/html; charset=utf-8" );
2599+ res.status = 503 ;
2600+ } else {
2601+ res_error (res, format_error_response (" Loading model" , ERROR_TYPE_UNAVAILABLE));
2602+ }
25992603 return false ;
26002604 }
26012605 return true ;
Original file line number Diff line number Diff line change @@ -105,8 +105,16 @@ Feature: llama.cpp server
105105 Given first token is removed
106106 Then tokens can be detokenized
107107
108+ Scenario : Tokenize with pieces
109+ When tokenizing with pieces:
110+ """
111+ What is the capital of Germany?
112+ 媽
113+ """
114+ Then tokens are given with pieces
115+
108116 Scenario : Models available
109117 Given available models
110118 Then 1 models are supported
111119 Then model 0 is identified by tinyllama-2
112- Then model 0 is trained on 128 tokens context
120+ Then model 0 is trained on 128 tokens context
Original file line number Diff line number Diff line change @@ -1208,6 +1208,7 @@ async def wait_for_slots_status(context,
12081208 while True :
12091209 async with await session .get (f'{ base_url } /slots' , params = params ) as slots_response :
12101210 status_code = slots_response .status
1211+ print (await slots_response .text ())
12111212 slots = await slots_response .json ()
12121213 if context .debug :
12131214 print (f"slots responses { slots } \n " )
@@ -1372,4 +1373,4 @@ def server_log(in_stream, out_stream):
13721373 thread_stderr = threading .Thread (target = server_log , args = (context .server_process .stderr , sys .stderr ))
13731374 thread_stderr .start ()
13741375
1375- print (f"server pid={ context .server_process .pid } , behave pid={ os .getpid ()} " )
1376+ print (f"server pid={ context .server_process .pid } , behave pid={ os .getpid ()} " )
You can’t perform that action at this time.
0 commit comments