Skip to content

Commit 3603399

Browse files
committed
add test
1 parent 90889fd commit 3603399

File tree

3 files changed

+43
-5
lines changed

3 files changed

+43
-5
lines changed

examples/server/tests/unit/test_chat_completion.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
8383
def test_chat_completion_with_openai_library():
8484
global server
8585
server.start()
86-
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}")
86+
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
8787
res = client.chat.completions.create(
8888
model="gpt-3.5-turbo-instruct",
8989
messages=[
@@ -170,7 +170,7 @@ def test_chat_completion_with_timings_per_token():
170170
def test_logprobs():
171171
global server
172172
server.start()
173-
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}")
173+
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
174174
res = client.chat.completions.create(
175175
model="gpt-3.5-turbo-instruct",
176176
temperature=0.0,
@@ -197,7 +197,7 @@ def test_logprobs():
197197
def test_logprobs_stream():
198198
global server
199199
server.start()
200-
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}")
200+
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
201201
res = client.chat.completions.create(
202202
model="gpt-3.5-turbo-instruct",
203203
temperature=0.0,

examples/server/tests/unit/test_completion.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22
import time
3+
from openai import OpenAI
34
from utils import *
45

56
server = ServerPreset.tinyllama2()
@@ -85,6 +86,40 @@ def test_completion_stream_vs_non_stream():
8586
assert content_stream == res_non_stream.body["content"]
8687

8788

89+
def test_completion_stream_with_openai_library():
90+
global server
91+
server.start()
92+
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
93+
res = client.completions.create(
94+
model="davinci-002",
95+
prompt="I believe the meaning of life is",
96+
max_tokens=8,
97+
)
98+
assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b")
99+
assert res.choices[0].finish_reason == "length"
100+
assert res.choices[0].text is not None
101+
assert match_regex("(going|bed)+", res.choices[0].text)
102+
103+
104+
def test_completion_with_openai_library():
105+
global server
106+
server.start()
107+
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
108+
res = client.completions.create(
109+
model="davinci-002",
110+
prompt="I believe the meaning of life is",
111+
max_tokens=8,
112+
stream=True,
113+
)
114+
output_text = ''
115+
for data in res:
116+
choice = data.choices[0]
117+
if choice.finish_reason is None:
118+
assert choice.text is not None
119+
output_text += choice.text
120+
assert match_regex("(going|bed)+", output_text)
121+
122+
88123
@pytest.mark.parametrize("n_slots", [1, 2])
89124
def test_consistent_result_same_seed(n_slots: int):
90125
global server

examples/server/utils.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -570,8 +570,11 @@ static json oaicompat_completion_params_parse(const json & body) {
570570
}
571571

572572
// Params supported by OAI but unsupported by llama.cpp
573-
if (body.contains("best_of")) {
574-
throw std::runtime_error("Unsupported param: best_of");
573+
static const std::vector<std::string> unsupported_params { "best_of", "echo", "suffix" };
574+
for (const auto & param : unsupported_params) {
575+
if (body.contains(param)) {
576+
throw std::runtime_error("Unsupported param: " + param);
577+
}
575578
}
576579

577580
// Copy remaining properties to llama_params

0 commit comments

Comments
 (0)