@@ -34,7 +34,7 @@ def test_infill_with_input_extra():
3434 "input_suffix" : "}\n " ,
3535 })
3636 assert res .status_code == 200
37- assert match_regex ("(cuts|Jimmy|mom|came|into|the|room )+" , res .body ["content" ])
37+ assert match_regex ("(help|find|band )+" , res .body ["content" ])
3838
3939
4040@pytest .mark .parametrize ("input_extra" , [
@@ -55,3 +55,23 @@ def test_invalid_input_extra_req(input_extra):
5555 })
5656 assert res .status_code == 400
5757 assert "error" in res .body
58+
59+
60+ @pytest .mark .skipif (not is_slow_test_allowed (), reason = "skipping slow test" )
61+ def test_with_qwen_model ():
62+ global server
63+ server .model_file = None
64+ server .model_hf_repo = "Qwen/CodeQwen1.5-7B-Chat-GGUF"
65+ server .model_hf_file = "codeqwen-1_5-7b-chat-q2_k.gguf"
66+ server .start (timeout_seconds = 600 )
67+ res = server .make_request ("POST" , "/infill" , data = {
68+ "prompt" : "Complete this" ,
69+ "input_extra" : [{
70+ "filename" : "llama.h" ,
71+ "text" : "LLAMA_API int32_t llama_n_threads();\n "
72+ }],
73+ "input_prefix" : "#include <cstdio>\n #include \" llama.h\" \n \n int main() {\n int n_threads = llama_" ,
74+ "input_suffix" : "}\n " ,
75+ })
76+ assert res .status_code == 200
77+ assert "n_threads" in res .body ["content" ]
0 commit comments