@@ -14,7 +14,7 @@ def create_server():
1414@pytest .mark .parametrize (
1515 "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason" ,
1616 [
17- ("llama-2" , "Book" , "What is the best book" , 8 , "(Suddenly)+" , 77 , 8 , "length" ),
17+ (None , "Book" , "What is the best book" , 8 , "(Suddenly)+" , 77 , 8 , "length" ),
1818 ("codellama70b" , "You are a coding assistant." , "Write the fibonacci function in c++." , 128 , "(Aside|she|felter|alonger)+" , 104 , 64 , "length" ),
1919 ]
2020)
@@ -30,6 +30,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
3030 ],
3131 })
3232 assert res .status_code == 200
33+ assert res .body ["model" ] == model if model is not None else server .model_alias
3334 assert res .body ["usage" ]["prompt_tokens" ] == n_prompt
3435 assert res .body ["usage" ]["completion_tokens" ] == n_predicted
3536 choice = res .body ["choices" ][0 ]
@@ -39,17 +40,17 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
3940
4041
4142@pytest .mark .parametrize (
42- "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,truncated " ,
43+ "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason " ,
4344 [
44- ("llama-2" , "Book" , "What is the best book" , 8 , "(Suddenly)+" , 77 , 8 , False ),
45- ("codellama70b" , "You are a coding assistant." , "Write the fibonacci function in c++." , 128 , "(Aside|she|felter|alonger)+" , 104 , 64 , False ),
45+ ("llama-2" , "Book" , "What is the best book" , 8 , "(Suddenly)+" , 77 , 8 , "length" ),
46+ ("codellama70b" , "You are a coding assistant." , "Write the fibonacci function in c++." , 128 , "(Aside|she|felter|alonger)+" , 104 , 64 , "length" ),
4647 ]
4748)
48- def test_chat_completion_stream (model , system_prompt , user_prompt , max_tokens , re_content , n_prompt , n_predicted , truncated ):
49+ def test_chat_completion_stream (model , system_prompt , user_prompt , max_tokens , re_content , n_prompt , n_predicted , finish_reason ):
4950 global server
51+ server .model_alias = None
5052 server .start ()
5153 res = server .make_stream_request ("POST" , "/chat/completions" , data = {
52- "model" : model ,
5354 "max_tokens" : max_tokens ,
5455 "messages" : [
5556 {"role" : "system" , "content" : system_prompt },
@@ -60,16 +61,13 @@ def test_chat_completion_stream(model, system_prompt, user_prompt, max_tokens, r
6061 content = ""
6162 for data in res :
6263 choice = data ["choices" ][0 ]
64+ assert "gpt-3.5" in data ["model" ] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
6365 if choice ["finish_reason" ] in ["stop" , "length" ]:
6466 assert data ["usage" ]["prompt_tokens" ] == n_prompt
6567 assert data ["usage" ]["completion_tokens" ] == n_predicted
6668 assert "content" not in choice ["delta" ]
6769 assert match_regex (re_content , content )
68- # FIXME: not sure why this is incorrect in stream mode
69- # if truncated:
70- # assert choice["finish_reason"] == "length"
71- # else:
72- # assert choice["finish_reason"] == "stop"
70+ assert choice ["finish_reason" ] == finish_reason
7371 else :
7472 assert choice ["finish_reason" ] is None
7573 content += choice ["delta" ]["content" ]
0 commit comments